From 462526e71525ca4f45ae517a8ad9eac3d7a16c11 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 29 Apr 2021 10:59:38 +0200
Subject: [PATCH 01/21] POC for Copy-on-Write

---
 pandas/core/frame.py                        |   4 +-
 pandas/core/generic.py                      |   6 +-
 pandas/core/indexing.py                     |   6 +
 pandas/core/internals/array_manager.py      |  64 +++-
 pandas/core/internals/managers.py           |  10 +-
 pandas/core/series.py                       |   5 +-
 pandas/tests/indexing/test_copy_on_write.py | 404 ++++++++++++++++++++
 7 files changed, 484 insertions(+), 15 deletions(-)
 create mode 100644 pandas/tests/indexing/test_copy_on_write.py

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index cde9364b55cb2..46a25913e1371 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4930,7 +4930,7 @@ def rename(
         index: Renamer | None = None,
         columns: Renamer | None = None,
         axis: Axis | None = None,
-        copy: bool = True,
+        copy: bool | None = None,
         inplace: bool = False,
         level: Level | None = None,
         errors: str = "ignore",
@@ -5764,7 +5764,7 @@ class    max    type
         if inplace:
             new_obj = self
         else:
-            new_obj = self.copy()
+            new_obj = self.copy(deep=None)
 
         new_index = ibase.default_index(len(new_obj))
         if level is not None:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 9d44ea947e562..f6a5fef294544 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -992,7 +992,7 @@ def rename(
         index: Renamer | None = None,
         columns: Renamer | None = None,
         axis: Axis | None = None,
-        copy: bool_t = True,
+        copy: bool_t | None = None,
         inplace: bool_t = False,
         level: Level | None = None,
         errors: str = "ignore",
@@ -3911,6 +3911,8 @@ def _check_setitem_copy(self, stacklevel=4, t="setting", force=False):
         df.iloc[0:5]['group'] = 'a'
 
         """
+        if isinstance(self._mgr, (ArrayManager, SingleArrayManager)):
+            return
         # return early if the check is not needed
         if not (force or self._is_copy):
             return
@@ -5833,7 +5835,7 @@ def astype(
         return result
 
     @final
-    def copy(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries:
+    def copy(self: FrameOrSeries, deep: bool_t | None = True) -> FrameOrSeries:
         """
         Make a copy of this object's indices and data.
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 4f55459040bc0..e6ae642fcb495 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1869,6 +1869,12 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
         """
         pi = plane_indexer
 
+        if not getattr(self.obj._mgr, "blocks", False):
+            # ArrayManager
+            self.obj._mgr.column_setitem(loc, plane_indexer, value)
+            self.obj._clear_item_cache()
+            return
+
         ser = self.obj._ixs(loc, axis=1)
 
         # perform the equivalent of a setitem on the info axis
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 31e32b053367b..e4b95208ecad6 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -9,6 +9,7 @@
     Callable,
     TypeVar,
 )
+import weakref
 
 import numpy as np
 
@@ -121,11 +122,13 @@ class BaseArrayManager(DataManager):
 
     arrays: list[np.ndarray | ExtensionArray]
     _axes: list[Index]
+    refs: list | None
 
     def __init__(
         self,
         arrays: list[np.ndarray | ExtensionArray],
         axes: list[Index],
+        refs: list | None,
         verify_integrity: bool = True,
     ):
         raise NotImplementedError
@@ -176,6 +179,15 @@ def is_consolidated(self) -> bool:
     def _consolidate_inplace(self) -> None:
         pass
 
+    def _has_no_reference(self, i: int):
+        return (self.refs is None or self.refs[i] is None) and weakref.getweakrefcount(
+            self.arrays[i]
+        ) == 0
+
+    def _clear_reference(self, i: int):
+        if self.refs is not None:
+            self.refs[i] = None
+
     def get_dtypes(self):
         return np.array([arr.dtype for arr in self.arrays], dtype="object")
 
@@ -516,6 +528,12 @@ def copy(self: T, deep=True) -> T:
         -------
         BlockManager
         """
+        if deep is not True:
+            refs = [weakref.ref(arr) for arr in self.arrays]
+            return type(self)(
+                list(self.arrays), list(self._axes), refs, verify_integrity=False
+            )
+
         # this preserves the notion of view copying of axes
         if deep:
             # hit in e.g. tests.io.json.test_pandas
@@ -530,7 +548,11 @@ def copy_func(ax):
         if deep:
             new_arrays = [arr.copy() for arr in self.arrays]
         else:
-            new_arrays = self.arrays
+            new_arrays = list(self.arrays)
+        if deep:
+            refs = None
+        else:
+            refs = list(self.refs) if isinstance(self.refs, list) else self.refs
         return type(self)(new_arrays, new_axes)
 
     def reindex_indexer(
@@ -599,14 +621,18 @@ def _reindex_indexer(
 
         if axis == 1:
             new_arrays = []
+            refs = []
             for i in indexer:
                 if i == -1:
                     arr = self._make_na_array(
                         fill_value=fill_value, use_na_proxy=use_na_proxy
                     )
+                    ref = None
                 else:
                     arr = self.arrays[i]
+                    ref = weakref.ref(arr)
                 new_arrays.append(arr)
+                refs.append(ref)
 
         else:
             validate_indices(indexer, len(self._axes[0]))
@@ -625,11 +651,12 @@ def _reindex_indexer(
                 )
                 for arr in self.arrays
             ]
+            refs = None
 
         new_axes = list(self._axes)
         new_axes[axis] = new_axis
 
-        return type(self)(new_arrays, new_axes, verify_integrity=False)
+        return type(self)(new_arrays, new_axes, refs, verify_integrity=False)
 
     def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T:
         """
@@ -693,12 +720,14 @@ def __init__(
         self,
         arrays: list[np.ndarray | ExtensionArray],
         axes: list[Index],
+        refs: list | None = None,
         verify_integrity: bool = True,
     ):
         # Note: we are storing the axes in "_axes" in the (row, columns) order
         # which contrasts the order how it is stored in BlockManager
         self._axes = axes
         self.arrays = arrays
+        self.refs = refs
 
         if verify_integrity:
             self._axes = [ensure_index(ax) for ax in axes]
@@ -768,15 +797,19 @@ def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager:
 
         new_axes = list(self._axes)
         new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
+        # TODO possible optimization is to have `refs` to be a weakref to the
+        # full ArrayManager to indicate it's referencing
+        refs = [weakref.ref(arr) for arr in self.arrays]
 
-        return type(self)(arrays, new_axes, verify_integrity=False)
+        return type(self)(arrays, new_axes, refs, verify_integrity=False)
 
     def iget(self, i: int) -> SingleArrayManager:
         """
         Return the data as a SingleArrayManager.
         """
         values = self.arrays[i]
-        return SingleArrayManager([values], [self._axes[0]])
+        ref = weakref.ref(values)
+        return SingleArrayManager([values], [self._axes[0]], [ref])
 
     def iget_values(self, i: int) -> ArrayLike:
         """
@@ -882,6 +915,8 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
         # TODO is this copy needed?
         arrays = self.arrays.copy()
         arrays.insert(loc, value)
+        if self.refs is not None:
+            self.refs.insert(loc, None)
 
         self.arrays = arrays
         self._axes[1] = new_axis
@@ -897,6 +932,15 @@ def idelete(self, indexer):
         self._axes = [self._axes[0], self._axes[1][to_keep]]
         return self
 
+    def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value):
+        if self._has_no_reference(loc):
+            self.arrays[loc][idx] = value
+        else:
+            arr = self.arrays[loc].copy()
+            arr[idx] = value
+            self.arrays[loc] = arr
+            self._clear_reference(loc)
+
     # --------------------------------------------------------------------
     # Array-wise Operation
 
@@ -1179,10 +1223,12 @@ def __init__(
         self,
         arrays: list[np.ndarray | ExtensionArray],
         axes: list[Index],
+        refs: list | None = None,
         verify_integrity: bool = True,
     ):
         self._axes = axes
         self.arrays = arrays
+        self.refs = refs
 
         if verify_integrity:
             assert len(axes) == 1
@@ -1286,8 +1332,14 @@ def apply(self, func, **kwargs):
             new_array = getattr(self.array, func)(**kwargs)
         return type(self)([new_array], self._axes)
 
-    def setitem(self, indexer, value):
-        return self.apply_with_block("setitem", indexer=indexer, value=value)
+    def setitem(self, indexer, value, inplace=False):
+        if not self._has_no_reference(0):
+            self.arrays[0] = self.arrays[0].copy()
+            self._clear_reference(0)
+        if inplace:
+            self.arrays[0][indexer] = value
+        else:
+            return self.apply_with_block("setitem", indexer=indexer, value=value)
 
     def idelete(self, indexer) -> SingleArrayManager:
         """
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 323aa45874d96..a7a980b280e4c 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -351,8 +351,12 @@ def where(self: T, other, cond, align: bool, errors: str) -> T:
             errors=errors,
         )
 
-    def setitem(self: T, indexer, value) -> T:
-        return self.apply("setitem", indexer=indexer, value=value)
+    def setitem(self: T, indexer, value, inplace=False) -> T:
+        if inplace:
+            assert self.ndim == 1
+            self._block.values[indexer] = value
+        else:
+            return self.apply("setitem", indexer=indexer, value=value)
 
     def putmask(self, mask, new, align: bool = True):
 
@@ -562,6 +566,8 @@ def copy(self: T, deep=True) -> T:
         -------
         BlockManager
         """
+        if deep is None:
+            deep = True
         # this preserves the notion of view copying of axes
         if deep:
             # hit in e.g. tests.io.json.test_pandas
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 2f45a2adbdec7..e78f54c361fed 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1064,10 +1064,9 @@ def __setitem__(self, key, value) -> None:
         try:
             self._set_with_engine(key, value)
         except (KeyError, ValueError):
-            values = self._values
             if is_integer(key) and self.index.inferred_type != "integer":
                 # positional setter
-                values[key] = value
+                self._mgr.setitem(key, value, inplace=True)
             else:
                 # GH#12862 adding a new key to the Series
                 self.loc[key] = value
@@ -1099,7 +1098,7 @@ def _set_with_engine(self, key, value) -> None:
         # error: Argument 1 to "validate_numeric_casting" has incompatible type
         # "Union[dtype, ExtensionDtype]"; expected "dtype"
         validate_numeric_casting(self.dtype, value)  # type: ignore[arg-type]
-        self._values[loc] = value
+        self._mgr.setitem(loc, value, inplace=True)
 
     def _set_with(self, key, value):
         # other: fancy integer or otherwise
diff --git a/pandas/tests/indexing/test_copy_on_write.py b/pandas/tests/indexing/test_copy_on_write.py
new file mode 100644
index 0000000000000..034d53c9e31c6
--- /dev/null
+++ b/pandas/tests/indexing/test_copy_on_write.py
@@ -0,0 +1,404 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+import pandas.core.common as com
+
+
+def test_copy(using_array_manager):
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_copy = df.copy()
+
+    # the deep copy doesn't share memory
+    assert not np.may_share_memory(df_copy["a"].values, df["a"].values)
+    if using_array_manager:
+        assert df_copy._mgr.refs is None
+
+    # mutating copy doesn't mutate original
+    df_copy.iloc[0, 0] = 0
+    assert df.iloc[0, 0] == 1
+
+    # copy of df + copy of subset
+    # normal copy -> refs are removed, no mutation of parent
+    # deep=None -> refs are still generated / kept
+    # copy=False -> refs are kept? But if we then edit it
+
+    # df = ...
+    # subset = df[1:3]
+    # subset_shallow_copy = subset.copy(deep=False)
+    # -> expected behaviour: mutating subset_shallow_copy should mutate subset
+    #    but not mutate parent df
+    #    - if we keep refs -> we copy on setitem -> subset is not mutated
+    #    - if we remove refs -> we don't copy on setitem, but then also parent df
+    #      is mutated
+    # -> disallow taking a shallow copy of a DataFrame that referencing other arrays?
+
+
+def test_copy_shallow(using_array_manager):
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_copy = df.copy(deep=False)
+
+    # the shallow copy still shares memory
+    assert np.may_share_memory(df_copy["a"].values, df["a"].values)
+    if using_array_manager:
+        assert df_copy._mgr.refs is not None
+
+    if using_array_manager:
+        # mutating shallow copy doesn't mutate original
+        df_copy.iloc[0, 0] = 0
+        assert df.iloc[0, 0] == 1
+        # mutating triggered a copy-on-write -> no longer shares memory
+        assert not np.may_share_memory(df_copy["a"].values, df["a"].values)
+        # but still shares memory for the other columns
+        assert np.may_share_memory(df_copy["b"].values, df["b"].values)
+    else:
+        # mutating shallow copy does mutate original
+        df_copy.iloc[0, 0] = 0
+        assert df.iloc[0, 0] == 0
+        # and still shares memory
+        assert np.may_share_memory(df_copy["a"].values, df["a"].values)
+
+
+# -----------------------------------------------------------------------------
+# DataFrame methods returning new DataFrame using shallow copy
+
+
+def test_reset_index(using_array_manager):
+    # Case: resetting the index (i.e. adding a new column) + mutating the
+    # resulting dataframe
+    df = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=[10, 11, 12]
+    )
+    df_orig = df.copy()
+    df2 = df.reset_index()
+
+    if using_array_manager:
+        # still shares memory (df2 is a shallow copy)
+        assert np.may_share_memory(df2["b"].values, df["b"].values)
+        assert np.may_share_memory(df2["c"].values, df["c"].values)
+    # mutating df2 triggers a copy-on-write for that column
+    df2.iloc[0, 2] = 0
+    assert not np.may_share_memory(df2["b"].values, df["b"].values)
+    if using_array_manager:
+        assert np.may_share_memory(df2["c"].values, df["c"].values)
+    tm.assert_frame_equal(df, df_orig)
+
+
+def test_rename_columns(using_array_manager):
+    # Case: renaming columns returns a new dataframe
+    # + afterwards modifying the result
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+    df2 = df.rename(columns=str.upper)
+
+    if using_array_manager:
+        assert np.may_share_memory(df2["A"].values, df["a"].values)
+    df2.iloc[0, 0] = 0
+    assert not np.may_share_memory(df2["A"].values, df["a"].values)
+    if using_array_manager:
+        assert np.may_share_memory(df2["C"].values, df["c"].values)
+    expected = pd.DataFrame({"A": [0, 2, 3], "B": [4, 5, 6], "C": [0.1, 0.2, 0.3]})
+    tm.assert_frame_equal(df2, expected)
+    tm.assert_frame_equal(df, df_orig)
+
+
+def test_rename_columns_modify_parent(using_array_manager):
+    # Case: renaming columns returns a new dataframe
+    # + afterwards modifying the original (parent) dataframe
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df2 = df.rename(columns=str.upper)
+    df2_orig = df2.copy()
+
+    if using_array_manager:
+        assert np.may_share_memory(df2["A"].values, df["a"].values)
+    df.iloc[0, 0] = 0
+    assert not np.may_share_memory(df2["A"].values, df["a"].values)
+    if using_array_manager:
+        assert np.may_share_memory(df2["C"].values, df["c"].values)
+    expected = pd.DataFrame({"a": [0, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    tm.assert_frame_equal(df, expected)
+    tm.assert_frame_equal(df2, df2_orig)
+
+
+# -----------------------------------------------------------------------------
+# Indexing operations taking subset + modifying the subset/parent
+
+
+def test_subset_column_selection(using_array_manager):
+    # Case: taking a subset of the columns of a DataFrame
+    # + afterwards modifying the subset
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+
+    subset = df[["a", "c"]]
+    if using_array_manager:
+        # the subset shares memory ...
+        assert np.may_share_memory(subset["a"].values, df["a"].values)
+        # ... but uses CoW when being modified
+        subset.iloc[0, 0] = 0
+    else:
+        with pd.option_context("chained_assignment", "warn"):
+            with tm.assert_produces_warning(com.SettingWithCopyWarning):
+                subset.iloc[0, 0] = 0
+
+    assert not np.may_share_memory(subset["a"].values, df["a"].values)
+
+    expected = pd.DataFrame({"a": [0, 2, 3], "c": [0.1, 0.2, 0.3]})
+    tm.assert_frame_equal(subset, expected)
+    tm.assert_frame_equal(df, df_orig)
+
+
+def test_subset_column_selection_modify_parent(using_array_manager):
+    # Case: taking a subset of the columns of a DataFrame
+    # + afterwards modifying the parent
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+
+    subset = df[["a", "c"]]
+    if using_array_manager:
+        # the subset shares memory ...
+        assert np.may_share_memory(subset["a"].values, df["a"].values)
+        # ... but parent uses CoW parent when it is modified
+    df.iloc[0, 0] = 0
+
+    assert not np.may_share_memory(subset["a"].values, df["a"].values)
+    if using_array_manager:
+        assert np.may_share_memory(subset["c"].values, df["c"].values)
+
+    expected = pd.DataFrame({"a": [1, 2, 3], "c": [0.1, 0.2, 0.3]})
+    tm.assert_frame_equal(subset, expected)
+
+
+def test_subset_row_slice(using_array_manager):
+    # Case: taking a subset of the rows of a DataFrame using a slice
+    # + afterwards modifying the subset
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+
+    subset = df[1:3]
+    assert np.may_share_memory(subset["a"].values, df["a"].values)
+
+    if using_array_manager:
+        subset.iloc[0, 0] = 0
+        assert not np.may_share_memory(subset["a"].values, df["a"].values)
+
+    else:
+        with pd.option_context("chained_assignment", "warn"):
+            with tm.assert_produces_warning(com.SettingWithCopyWarning):
+                subset.iloc[0, 0] = 0
+
+    expected = pd.DataFrame(
+        {"a": [0, 3], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3)
+    )
+    tm.assert_frame_equal(subset, expected)
+    if using_array_manager:
+        # original parent dataframe is not modified (CoW)
+        tm.assert_frame_equal(df, df_orig)
+    else:
+        # original parent dataframe is actually updated
+        df_orig.iloc[1, 0] = 0
+        tm.assert_frame_equal(df, df_orig)
+
+
+@pytest.mark.parametrize(
+    "indexer",
+    [slice(0, 2), np.array([True, True, False]), np.array([0, 1])],
+    ids=["slice", "mask", "array"],
+)
+def test_subset_set_with_row_indexer(indexer_si, indexer, using_array_manager):
+    # Case: setting values with a row indexer on a viewing subset
+    # subset[indexer] = value and subset.iloc[indexer] = value
+    df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]})
+    df_orig = df.copy()
+    subset = df[1:4]
+
+    if (
+        indexer_si is tm.setitem
+        and isinstance(indexer, np.ndarray)
+        and indexer.dtype == "int"
+    ):
+        pytest.skip("setitem with labels selects on columns")
+
+    if using_array_manager:
+        indexer_si(subset)[indexer] = 0
+    else:
+        with pd.option_context("chained_assignment", "warn"):
+            with tm.assert_produces_warning(com.SettingWithCopyWarning):
+                indexer_si(subset)[indexer] = 0
+
+    expected = pd.DataFrame(
+        {"a": [0, 0, 4], "b": [0, 0, 7], "c": [0.0, 0.0, 0.4]}, index=range(1, 4)
+    )
+    tm.assert_frame_equal(subset, expected)
+    if using_array_manager:
+        # original parent dataframe is not modified (CoW)
+        tm.assert_frame_equal(df, df_orig)
+    else:
+        # original parent dataframe is actually updated
+        df_orig[1:3] = 0
+        tm.assert_frame_equal(df, df_orig)
+
+
+def test_subset_set_column(using_array_manager):
+    # Case: setting a single column on a viewing subset -> subset[col] = value
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+    subset = df[1:3]
+
+    if using_array_manager:
+        subset["a"] = np.array([10, 11])
+    else:
+        with pd.option_context("chained_assignment", "warn"):
+            with tm.assert_produces_warning(com.SettingWithCopyWarning):
+                subset["a"] = np.array([10, 11])
+
+    expected = pd.DataFrame(
+        {"a": [10, 11], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3)
+    )
+    tm.assert_frame_equal(subset, expected)
+    if using_array_manager:
+        tm.assert_frame_equal(df, df_orig)
+    else:
+        df_orig.loc[1:2, "a"] = np.array([10, 11])
+        tm.assert_frame_equal(df, df_orig)
+
+
+def test_subset_set_columns_single_block(using_array_manager):
+    # Case: setting multiple columns on a viewing subset
+    # -> subset[[col1, col2]] = value
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+    df_orig = df.copy()
+    subset = df[1:3]
+
+    if using_array_manager:
+        subset[["a", "c"]] = 0
+    else:
+        with pd.option_context("chained_assignment", "warn"):
+            with tm.assert_produces_warning(com.SettingWithCopyWarning):
+                subset[["a", "c"]] = 0
+
+    expected = pd.DataFrame({"a": [0, 0], "b": [5, 6], "c": [0, 0]}, index=range(1, 3))
+    tm.assert_frame_equal(subset, expected)
+    if using_array_manager:
+        tm.assert_frame_equal(df, df_orig)
+    else:
+        df_orig.loc[1:2, ["a", "c"]] = 0
+        tm.assert_frame_equal(df, df_orig)
+
+
+def test_subset_set_columns_mixed_block(using_array_manager):
+    # Case: setting multiple columns on a viewing subset
+    # -> subset[[col1, col2]] = value
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+    subset = df[1:3]
+
+    if using_array_manager:
+        subset[["a", "c"]] = 0
+    else:
+        with pd.option_context("chained_assignment", "warn"):
+            with tm.assert_produces_warning(com.SettingWithCopyWarning):
+                subset[["a", "c"]] = 0
+
+    expected = pd.DataFrame({"a": [0, 0], "b": [5, 6], "c": [0, 0]}, index=range(1, 3))
+    tm.assert_frame_equal(subset, expected)
+    if using_array_manager:
+        tm.assert_frame_equal(df, df_orig)
+    else:
+        # In the mixed case with BlockManager, only one of the two columns is
+        # mutated in the parent frame ..
+        df_orig.loc[1:2, ["a"]] = 0
+        tm.assert_frame_equal(df, df_orig)
+
+
+@pytest.mark.parametrize(
+    "indexer",
+    [slice("a", "b"), np.array([True, True, False]), ["a", "b"]],
+    ids=["slice", "mask", "array"],
+)
+def test_subset_set_with_column_indexer(indexer, using_array_manager):
+    # Case: setting multiple columns with a column indexer on a viewing subset
+    # -> subset.loc[:, [col1, col2]] = value
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]})
+    df_orig = df.copy()
+    subset = df[1:3]
+
+    if using_array_manager:
+        subset.loc[:, indexer] = 0
+    else:
+        with pd.option_context("chained_assignment", "warn"):
+            with tm.assert_produces_warning(com.SettingWithCopyWarning):
+                subset.loc[:, indexer] = 0
+
+    expected = pd.DataFrame(
+        {"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3)
+    )
+    if not using_array_manager:
+        expected["b"] = expected["b"].astype("int64")
+    tm.assert_frame_equal(subset, expected)
+    if using_array_manager:
+        tm.assert_frame_equal(df, df_orig)
+    else:
+        # In the mixed case with BlockManager, only one of the two columns is
+        # mutated in the parent frame ..
+        df_orig.loc[1:2, ["a"]] = 0
+        tm.assert_frame_equal(df, df_orig)
+
+
+# TODO add more tests modifying the parent
+
+
+# -----------------------------------------------------------------------------
+# Accessing column as Series
+
+
+def test_column_as_series(using_array_manager):
+    # Case: selecting a single column now also uses Copy-on-Write
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+
+    s = df["a"]
+
+    assert np.may_share_memory(s.values, df["a"].values)
+
+    if using_array_manager:
+        s[0] = 0
+    else:
+        with pd.option_context("chained_assignment", "warn"):
+            with tm.assert_produces_warning(com.SettingWithCopyWarning):
+                s[0] = 0
+
+    if using_array_manager:
+        # assert not np.may_share_memory(s.values, df["a"].values)
+        tm.assert_frame_equal(df, df_orig)
+    else:
+        df_orig.iloc[0, 0] = 0
+        tm.assert_frame_equal(df, df_orig)
+
+
+# TODO add tests for other indexing methods on the Series
+
+
+def test_dataframe_add_column_from_series():
+    # Case: adding a new column to a DataFrame from an existing column/series
+    # -> always already takes a copy on assignment
+    # (no change in behaviour here)
+    # TODO can we achieve the same behaviour with Copy-on-Write?
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
+
+    s = pd.Series([10, 11, 12])
+    df["new"] = s
+    assert not np.may_share_memory(df["new"].values, s.values)
+
+    # editing series -> doesn't modify column in frame
+    s[0] = 0
+    expected = pd.DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "new": [10, 11, 12]})
+    tm.assert_frame_equal(df, expected)
+
+    # editing column in frame -> doesn't modify series
+    df.loc[2, "new"] = 100
+    expected = pd.Series([0, 11, 12])
+    tm.assert_series_equal(s, expected)
+
+
+# TODO add tests for constructors

From 41ee2b77013d031813de45ccb0fbaaee679ff01b Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 8 Jun 2021 20:37:41 +0200
Subject: [PATCH 02/21] clean-up ArrayManager.copy implementation

---
 pandas/core/internals/array_manager.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index e4b95208ecad6..6c081ad0fb092 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -528,11 +528,9 @@ def copy(self: T, deep=True) -> T:
         -------
         BlockManager
         """
-        if deep is not True:
-            refs = [weakref.ref(arr) for arr in self.arrays]
-            return type(self)(
-                list(self.arrays), list(self._axes), refs, verify_integrity=False
-            )
+        if deep is None:
+            # use shallow copy
+            deep = False
 
         # this preserves the notion of view copying of axes
         if deep:
@@ -547,13 +545,12 @@ def copy_func(ax):
 
         if deep:
             new_arrays = [arr.copy() for arr in self.arrays]
-        else:
-            new_arrays = list(self.arrays)
-        if deep:
             refs = None
         else:
-            refs = list(self.refs) if isinstance(self.refs, list) else self.refs
-        return type(self)(new_arrays, new_axes)
+            new_arrays = list(self.arrays)
+            refs = [weakref.ref(arr) for arr in self.arrays]
+
+        return type(self)(new_arrays, new_axes, refs, verify_integrity=False)
 
     def reindex_indexer(
         self: T,

From 7a8dffc9a30b0dae3096969bd3102a118e9ac5dd Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 8 Jun 2021 21:34:18 +0200
Subject: [PATCH 03/21] add some comments / docstrings

---
 pandas/core/internals/array_manager.py | 35 +++++++++++++++++++++-----
 pandas/core/internals/managers.py      |  1 +
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 6c081ad0fb092..08adc3fb48f30 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -111,6 +111,7 @@ class BaseArrayManager(DataManager):
     ----------
     arrays : Sequence of arrays
     axes : Sequence of Index
+    refs : Sequence of weakrefs or None, optional
     verify_integrity : bool, default True
 
     """
@@ -122,13 +123,13 @@ class BaseArrayManager(DataManager):
 
     arrays: list[np.ndarray | ExtensionArray]
     _axes: list[Index]
-    refs: list | None
+    refs: list[weakref.ref | None] | None
 
     def __init__(
         self,
         arrays: list[np.ndarray | ExtensionArray],
         axes: list[Index],
-        refs: list | None,
+        refs: list[weakref.ref | None] | None,
         verify_integrity: bool = True,
     ):
         raise NotImplementedError
@@ -180,11 +181,20 @@ def _consolidate_inplace(self) -> None:
         pass
 
     def _has_no_reference(self, i: int):
+        """
+        Check for column `i` if has references.
+        (whether it references another array or is itself being referenced)
+
+        Returns True if the columns has no references.
+        """
         return (self.refs is None or self.refs[i] is None) and weakref.getweakrefcount(
             self.arrays[i]
         ) == 0
 
     def _clear_reference(self, i: int):
+        """
+        Clear any reference for column `i`.
+        """
         if self.refs is not None:
             self.refs[i] = None
 
@@ -626,6 +636,7 @@ def _reindex_indexer(
                     )
                     ref = None
                 else:
+                    # reusing full column array -> track with reference
                     arr = self.arrays[i]
                     ref = weakref.ref(arr)
                 new_arrays.append(arr)
@@ -648,6 +659,8 @@ def _reindex_indexer(
                 )
                 for arr in self.arrays
             ]
+            # selecting rows with take always creates a copy -> no need to
+            # track references to original arrays
             refs = None
 
         new_axes = list(self._axes)
@@ -717,7 +730,7 @@ def __init__(
         self,
         arrays: list[np.ndarray | ExtensionArray],
         axes: list[Index],
-        refs: list | None = None,
+        refs: list[weakref.ref | None] | None = None,
         verify_integrity: bool = True,
     ):
         # Note: we are storing the axes in "_axes" in the (row, columns) order
@@ -794,8 +807,8 @@ def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager:
 
         new_axes = list(self._axes)
         new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
-        # TODO possible optimization is to have `refs` to be a weakref to the
-        # full ArrayManager to indicate it's referencing
+        # slicing results in views -> track references to original arrays
+        # TODO possible to optimizate this with single ref to the full ArrayManager?
         refs = [weakref.ref(arr) for arr in self.arrays]
 
         return type(self)(arrays, new_axes, refs, verify_integrity=False)
@@ -805,6 +818,7 @@ def iget(self, i: int) -> SingleArrayManager:
         Return the data as a SingleArrayManager.
         """
         values = self.arrays[i]
+        # getting single column array for Series -> track reference to original
         ref = weakref.ref(values)
         return SingleArrayManager([values], [self._axes[0]], [ref])
 
@@ -834,6 +848,7 @@ def iset(self, loc: int | slice | np.ndarray, value: ArrayLike):
             Positional location (already bounds checked)
         value : np.ndarray or ExtensionArray
         """
+        # TODO clear reference for item that is being overwritten
         # single column -> single integer index
         if lib.is_integer(loc):
 
@@ -913,6 +928,8 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
         arrays = self.arrays.copy()
         arrays.insert(loc, value)
         if self.refs is not None:
+            # inserted `value` is already a copy, no need to track reference
+            # TODO can we use CoW here as well?
             self.refs.insert(loc, None)
 
         self.arrays = arrays
@@ -922,6 +939,7 @@ def idelete(self, indexer):
         """
         Delete selected locations in-place (new block and array, same BlockManager)
         """
+        # TODO update refs
         to_keep = np.ones(self.shape[0], dtype=np.bool_)
         to_keep[indexer] = False
 
@@ -931,8 +949,10 @@ def idelete(self, indexer):
 
     def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value):
         if self._has_no_reference(loc):
+            # if no reference -> set array inplace
             self.arrays[loc][idx] = value
         else:
+            # otherwise perform Copy-on-Write and clear the reference
             arr = self.arrays[loc].copy()
             arr[idx] = value
             self.arrays[loc] = arr
@@ -1220,7 +1240,7 @@ def __init__(
         self,
         arrays: list[np.ndarray | ExtensionArray],
         axes: list[Index],
-        refs: list | None = None,
+        refs: list[weakref.ref | None] | None = None,
         verify_integrity: bool = True,
     ):
         self._axes = axes
@@ -1310,6 +1330,7 @@ def fast_xs(self, loc: int) -> ArrayLike:
         raise NotImplementedError("Use series._values[loc] instead")
 
     def get_slice(self, slobj: slice, axis: int = 0) -> SingleArrayManager:
+        # TODO track reference
         if axis >= self.ndim:
             raise IndexError("Requested axis not found in manager")
 
@@ -1331,6 +1352,7 @@ def apply(self, func, **kwargs):
 
     def setitem(self, indexer, value, inplace=False):
         if not self._has_no_reference(0):
+            # if being referenced -> perform Copy-on-Write and clear the reference
             self.arrays[0] = self.arrays[0].copy()
             self._clear_reference(0)
         if inplace:
@@ -1342,6 +1364,7 @@ def idelete(self, indexer) -> SingleArrayManager:
         """
         Delete selected locations in-place (new array, same ArrayManager)
         """
+        # TODO clear reference
         to_keep = np.ones(self.shape[0], dtype=np.bool_)
         to_keep[indexer] = False
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index a7a980b280e4c..f3ebf8f050821 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -567,6 +567,7 @@ def copy(self: T, deep=True) -> T:
         BlockManager
         """
         if deep is None:
+            # preserve deep copy for BlockManager with copy=None
             deep = True
         # this preserves the notion of view copying of axes
         if deep:

From 96b6d71855b7b566cb7effd40ed176f3035fbc53 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 25 Jun 2021 09:53:59 +0200
Subject: [PATCH 04/21] fix series slice + del operator

---
 pandas/core/indexing.py                     |  2 +-
 pandas/core/internals/array_manager.py      | 15 ++++-
 pandas/tests/indexing/test_copy_on_write.py | 75 +++++++++++++++++++++
 3 files changed, 88 insertions(+), 4 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index ddaad65bdaff8..01cb064b7aa47 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1894,7 +1894,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
         """
         pi = plane_indexer
 
-        if not getattr(self.obj._mgr, "blocks", False):
+        if not hasattr(self.obj._mgr, "blocks"):
             # ArrayManager
             self.obj._mgr.column_setitem(loc, plane_indexer, value)
             self.obj._clear_item_cache()
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 1fc2ca5251127..0e58996aebd42 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -767,6 +767,12 @@ def _verify_integrity(self) -> None:
                     "Passed arrays should be 1-dimensional, got array with "
                     f"{arr.ndim} dimensions instead."
                 )
+        if self.refs is not None:
+            if not len(self.refs) == n_columns:
+                raise ValueError(
+                    "Number of passed refs must equal the size of the column Index: "
+                    f"{len(self.refs)} refs vs {n_columns} columns."
+                )
 
     # --------------------------------------------------------------------
     # Indexing
@@ -937,12 +943,12 @@ def idelete(self, indexer):
         """
         Delete selected locations in-place (new block and array, same BlockManager)
         """
-        # TODO update refs
         to_keep = np.ones(self.shape[0], dtype=np.bool_)
         to_keep[indexer] = False
 
         self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]]
         self._axes = [self._axes[0], self._axes[1][to_keep]]
+        self.refs = [ref for i, ref in enumerate(self.refs) if to_keep[i]]
         return self
 
     def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value):
@@ -1339,7 +1345,9 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleArrayManager:
     def getitem_mgr(self, indexer) -> SingleArrayManager:
         new_array = self.array[indexer]
         new_index = self.index[indexer]
-        return type(self)([new_array], [new_index])
+        # TODO in theory only need to track reference if new_array is a view
+        ref = weakref.ref(self.array)
+        return type(self)([new_array], [new_index], [ref])
 
     def apply(self, func, **kwargs):
         if callable(func):
@@ -1362,12 +1370,13 @@ def idelete(self, indexer) -> SingleArrayManager:
         """
         Delete selected locations in-place (new array, same ArrayManager)
         """
-        # TODO clear reference
         to_keep = np.ones(self.shape[0], dtype=np.bool_)
         to_keep[indexer] = False
 
         self.arrays = [self.arrays[0][to_keep]]
         self._axes = [self._axes[0][to_keep]]
+        # clear reference since we are backed by new array
+        self.refs = None
         return self
 
     def _get_data_subset(self, predicate: Callable) -> SingleArrayManager:
diff --git a/pandas/tests/indexing/test_copy_on_write.py b/pandas/tests/indexing/test_copy_on_write.py
index 034d53c9e31c6..7994905f9125b 100644
--- a/pandas/tests/indexing/test_copy_on_write.py
+++ b/pandas/tests/indexing/test_copy_on_write.py
@@ -347,6 +347,81 @@ def test_subset_set_with_column_indexer(indexer, using_array_manager):
 
 # TODO add more tests modifying the parent
 
+# -----------------------------------------------------------------------------
+# Series -- Indexing operations taking subset + modifying the subset/parent
+
+
+def test_series_getitem_slice(using_array_manager):
+    # Case: taking a slice of a Series + afterwards modifying the subset
+    s = pd.Series([1, 2, 3], index=["a", "b", "c"])
+    s_orig = s.copy()
+
+    subset = s[:]
+    assert np.may_share_memory(subset.values, s.values)
+
+    subset.iloc[0] = 0
+
+    if using_array_manager:
+        assert not np.may_share_memory(subset.values, s.values)
+
+    expected = pd.Series([0, 2, 3], index=["a", "b", "c"])
+    tm.assert_series_equal(subset, expected)
+
+    if using_array_manager:
+        # original parent series is not modified (CoW)
+        tm.assert_series_equal(s, s_orig)
+    else:
+        # original parent series is actually updated
+        assert s.iloc[0] == 0
+
+
+# -----------------------------------------------------------------------------
+# del operator
+
+
+def test_del_frame(using_array_manager):
+    # Case: deleting a column with `del` on a viewing child dataframe should
+    # not modify parent + update the references
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+    df2 = df[:]
+
+    assert np.may_share_memory(df["a"].values, df2["a"].values)
+
+    del df2["b"]
+
+    assert np.may_share_memory(df["a"].values, df2["a"].values)
+    tm.assert_frame_equal(df, df_orig)
+    tm.assert_frame_equal(df2, df_orig[["a", "c"]])
+    df2._mgr._verify_integrity()
+
+    # TODO in theory modifying column "b" of the parent wouldn't need a CoW
+    # but the weakref is still alive and so we still perform CoW
+
+    if using_array_manager:
+        # modifying child after deleting a column still doesn't update parent
+        df2.loc[0, "a"] = 100
+        tm.assert_frame_equal(df, df_orig)
+
+
+def test_del_series(using_array_manager):
+    s = pd.Series([1, 2, 3], index=["a", "b", "c"])
+    s_orig = s.copy()
+    s2 = s[:]
+
+    assert np.may_share_memory(s.values, s2.values)
+
+    del s2["a"]
+
+    assert not np.may_share_memory(s.values, s2.values)
+    tm.assert_series_equal(s, s_orig)
+    tm.assert_series_equal(s2, s_orig[["b", "c"]])
+
+    # modifying s2 doesn't need copy on write (due to `del`, s2 is backed by new array)
+    values = s2.values
+    s2.loc["b"] = 100
+    assert values[0] == 100
+
 
 # -----------------------------------------------------------------------------
 # Accessing column as Series

From 17cedb97e92a290e30fb4b80802c91f7c18e53b5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 25 Jun 2021 13:19:19 +0200
Subject: [PATCH 05/21] fix pickle and column_setitem with dtype changes

---
 pandas/core/internals/array_manager.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 0e58996aebd42..f169b32a0f086 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -203,6 +203,10 @@ def get_dtypes(self):
 
     # TODO setstate getstate
 
+    def __reduce__(self):
+        # to avoid pickling the refs
+        return type(self), (self.arrays, self._axes)
+
     def __repr__(self) -> str:
         output = type(self).__name__
         output += f"\nIndex: {self._axes[0]}"
@@ -948,20 +952,25 @@ def idelete(self, indexer):
 
         self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]]
         self._axes = [self._axes[0], self._axes[1][to_keep]]
-        self.refs = [ref for i, ref in enumerate(self.refs) if to_keep[i]]
+        if self.refs is not None:
+            self.refs = [ref for i, ref in enumerate(self.refs) if to_keep[i]]
         return self
 
     def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value):
         if self._has_no_reference(loc):
-            # if no reference -> set array inplace
-            self.arrays[loc][idx] = value
+            # if no reference -> set array (potentially) inplace
+            arr = self.arrays[loc]
         else:
             # otherwise perform Copy-on-Write and clear the reference
             arr = self.arrays[loc].copy()
-            arr[idx] = value
-            self.arrays[loc] = arr
             self._clear_reference(loc)
 
+        # create temporary SingleArrayManager without ref to use setitem implementation
+        mgr = SingleArrayManager([arr], [self._axes[0]])
+        new_mgr = mgr.setitem(idx, value)
+        # update existing ArrayManager in-place
+        self.arrays[loc] = new_mgr.arrays[0]
+
     # --------------------------------------------------------------------
     # Array-wise Operation
 

From f4614c2648a26b8b80982418e0d2bf1e9c1e4047 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 25 Jun 2021 19:56:35 +0200
Subject: [PATCH 06/21] fix setitem on single column for full slice + update
 frame tests

---
 pandas/core/indexing.py                      |  6 +++-
 pandas/core/internals/array_manager.py       |  5 ++-
 pandas/tests/frame/indexing/test_indexing.py | 20 +++++++----
 pandas/tests/frame/indexing/test_xs.py       | 38 +++++++++++++-------
 pandas/tests/frame/test_constructors.py      | 11 ++++--
 5 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 01cb064b7aa47..49da809babf50 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1896,7 +1896,11 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
 
         if not hasattr(self.obj._mgr, "blocks"):
             # ArrayManager
-            self.obj._mgr.column_setitem(loc, plane_indexer, value)
+            if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)):
+                arr = self.obj._sanitize_column(value)
+                self.obj._mgr.iset(loc, arr)
+            else:
+                self.obj._mgr.column_setitem(loc, plane_indexer, value)
             self.obj._clear_item_cache()
             return
 
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index f169b32a0f086..356e4ced64c06 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -960,6 +960,9 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value):
         if self._has_no_reference(loc):
             # if no reference -> set array (potentially) inplace
             arr = self.arrays[loc]
+            # TODO we should try to avoid this (indexing.py::_setitem_single_column
+            # does a copy for the BM path as well)
+            arr = arr.copy()
         else:
             # otherwise perform Copy-on-Write and clear the reference
             arr = self.arrays[loc].copy()
@@ -967,7 +970,7 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value):
 
         # create temporary SingleArrayManager without ref to use setitem implementation
         mgr = SingleArrayManager([arr], [self._axes[0]])
-        new_mgr = mgr.setitem(idx, value)
+        new_mgr = mgr.setitem((idx,), value)
         # update existing ArrayManager in-place
         self.arrays[loc] = new_mgr.arrays[0]
 
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index e2121fa2318eb..5a48252ab7d18 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -269,7 +269,7 @@ def test_setattr_column(self):
         df.foobar = 5
         assert (df.foobar == 5).all()
 
-    def test_setitem(self, float_frame):
+    def test_setitem(self, float_frame, using_array_manager):
         # not sure what else to do here
         series = float_frame["A"][::2]
         float_frame["col5"] = series
@@ -305,8 +305,12 @@ def test_setitem(self, float_frame):
         smaller = float_frame[:2]
 
         msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
-        with pytest.raises(com.SettingWithCopyError, match=msg):
+        if using_array_manager:
+            # With ArrayManager, adding a new column doesn't raise a warning
             smaller["col10"] = ["1", "2"]
+        else:
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                smaller["col10"] = ["1", "2"]
 
         assert smaller["col10"].dtype == np.object_
         assert (smaller["col10"] == ["1", "2"]).all()
@@ -1004,14 +1008,18 @@ def test_iloc_row_slice_view(self, using_array_manager):
         # setting it makes it raise/warn
         subset = df.iloc[slice(4, 8)]
 
+        exp_col = original[2].copy()
+
         msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
-        with pytest.raises(com.SettingWithCopyError, match=msg):
+        if using_array_manager:
+            # INFO(ArrayManager) doesn't modify parent
             subset[2] = 0.0
+        else:
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                subset[2] = 0.0
 
-        exp_col = original[2].copy()
-        # TODO(ArrayManager) verify it is expected that the original didn't change
-        if not using_array_manager:
             exp_col[4:8] = 0.0
+
         tm.assert_series_equal(df[2], exp_col)
 
     def test_iloc_col(self):
diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py
index ccd989e2de411..51127a6b0d05a 100644
--- a/pandas/tests/frame/indexing/test_xs.py
+++ b/pandas/tests/frame/indexing/test_xs.py
@@ -119,9 +119,7 @@ def test_xs_view(self, using_array_manager):
         if using_array_manager:
             # INFO(ArrayManager) with ArrayManager getting a row as a view is
             # not possible
-            msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
-            with pytest.raises(com.SettingWithCopyError, match=msg):
-                dm.xs(2)[:] = 20
+            dm.xs(2)[:] = 20
             assert not (dm.xs(2) == 20).any()
         else:
             dm.xs(2)[:] = 20
@@ -158,27 +156,41 @@ def test_xs_level_eq_2(self):
         result = df.xs("c", level=2)
         tm.assert_frame_equal(result, expected)
 
-    def test_xs_setting_with_copy_error(self, multiindex_dataframe_random_data):
+    def test_xs_setting_with_copy_error(
+        self, multiindex_dataframe_random_data, using_array_manager
+    ):
         # this is a copy in 0.14
         df = multiindex_dataframe_random_data
+        df_orig = df.copy()
         result = df.xs("two", level="second")
 
-        # setting this will give a SettingWithCopyError
-        # as we are trying to write a view
-        msg = "A value is trying to be set on a copy of a slice from a DataFrame"
-        with pytest.raises(com.SettingWithCopyError, match=msg):
+        if not using_array_manager:
+            # setting this will give a SettingWithCopyError
+            # as we are trying to write a view
+            msg = "A value is trying to be set on a copy of a slice from a DataFrame"
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                result[:] = 10
+        else:
             result[:] = 10
+        tm.assert_frame_equal(df, df_orig)
 
-    def test_xs_setting_with_copy_error_multiple(self, four_level_index_dataframe):
+    def test_xs_setting_with_copy_error_multiple(
+        self, four_level_index_dataframe, using_array_manager
+    ):
         # this is a copy in 0.14
         df = four_level_index_dataframe
+        df_orig = df.copy()
         result = df.xs(("a", 4), level=["one", "four"])
 
-        # setting this will give a SettingWithCopyError
-        # as we are trying to write a view
-        msg = "A value is trying to be set on a copy of a slice from a DataFrame"
-        with pytest.raises(com.SettingWithCopyError, match=msg):
+        if not using_array_manager:
+            # setting this will give a SettingWithCopyError
+            # as we are trying to write a view
+            msg = "A value is trying to be set on a copy of a slice from a DataFrame"
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                result[:] = 10
+        else:
             result[:] = 10
+        tm.assert_frame_equal(df, df_orig)
 
     @pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])])
     def test_xs_with_duplicates(self, key, level, multiindex_dataframe_random_data):
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 1d286e379da86..98139459fd4ac 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -240,11 +240,16 @@ def test_constructor_dtype_copy(self):
         new_df["col1"] = 200.0
         assert orig_df["col1"][0] == 1.0
 
-    def test_constructor_dtype_nocast_view_dataframe(self):
+    def test_constructor_dtype_nocast_view_dataframe(self, using_array_manager):
         df = DataFrame([[1, 2]])
         should_be_view = DataFrame(df, dtype=df[0].dtype)
-        should_be_view[0][0] = 99
-        assert df.values[0, 0] == 99
+        if using_array_manager:
+            # INFO(ArrayManager) doesn't mutate original
+            should_be_view.iloc[0, 0] = 99
+            assert df.values[0, 0] == 1
+        else:
+            should_be_view[0][0] = 99
+            assert df.values[0, 0] == 99
 
     @td.skip_array_manager_invalid_test  # TODO(ArrayManager) keep view on 2D array?
     def test_constructor_dtype_nocast_view_2d_array(self):

From 7f183de69600357278149fe83335fabf927deced Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 26 Jun 2021 14:29:15 +0200
Subject: [PATCH 07/21] fix ref tracking in slicing columns

---
 pandas/core/internals/array_manager.py      |  8 +++---
 pandas/tests/indexing/test_copy_on_write.py | 30 +++++++++++++++++++--
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 356e4ced64c06..59eeb04c7f644 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -812,14 +812,16 @@ def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager:
 
         if axis == 0:
             arrays = [arr[slobj] for arr in self.arrays]
+            # slicing results in views -> track references to original arrays
+            # TODO possible to optimizate this with single ref to the full ArrayManager?
+            refs = [weakref.ref(arr) for arr in self.arrays]
         elif axis == 1:
             arrays = self.arrays[slobj]
+            # track reference to subset of column arrays
+            refs = [weakref.ref(arr) for arr in arrays]
 
         new_axes = list(self._axes)
         new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
-        # slicing results in views -> track references to original arrays
-        # TODO possible to optimizate this with single ref to the full ArrayManager?
-        refs = [weakref.ref(arr) for arr in self.arrays]
 
         return type(self)(arrays, new_axes, refs, verify_integrity=False)
 
diff --git a/pandas/tests/indexing/test_copy_on_write.py b/pandas/tests/indexing/test_copy_on_write.py
index 7994905f9125b..ce68b24ca718b 100644
--- a/pandas/tests/indexing/test_copy_on_write.py
+++ b/pandas/tests/indexing/test_copy_on_write.py
@@ -176,6 +176,8 @@ def test_subset_row_slice(using_array_manager):
     df_orig = df.copy()
 
     subset = df[1:3]
+    subset._mgr._verify_integrity()
+
     assert np.may_share_memory(subset["a"].values, df["a"].values)
 
     if using_array_manager:
@@ -200,6 +202,30 @@ def test_subset_row_slice(using_array_manager):
         tm.assert_frame_equal(df, df_orig)
 
 
+def test_subset_column_slice(using_array_manager):
+    # Case: taking a subset of the columns of a DataFrame using a slice
+    # + afterwards modifying the subset
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+
+    subset = df.iloc[:, 1:]
+    subset._mgr._verify_integrity()
+
+    if using_array_manager:
+        assert np.may_share_memory(subset["b"].values, df["b"].values)
+
+        subset.iloc[0, 0] = 0
+        assert not np.may_share_memory(subset["b"].values, df["b"].values)
+
+    else:
+        subset.iloc[0, 0] = 0
+
+    expected = pd.DataFrame({"b": [0, 5, 6], "c": [0.1, 0.2, 0.3]})
+    tm.assert_frame_equal(subset, expected)
+    # original parent dataframe is not modified (also not for BlockManager case)
+    tm.assert_frame_equal(df, df_orig)
+
+
 @pytest.mark.parametrize(
     "indexer",
     [slice(0, 2), np.array([True, True, False]), np.array([0, 1])],
@@ -333,8 +359,8 @@ def test_subset_set_with_column_indexer(indexer, using_array_manager):
     expected = pd.DataFrame(
         {"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3)
     )
-    if not using_array_manager:
-        expected["b"] = expected["b"].astype("int64")
+    # TODO full row slice .loc[:, idx] update inplace instead of overwrite?
+    expected["b"] = expected["b"].astype("int64")
     tm.assert_frame_equal(subset, expected)
     if using_array_manager:
         tm.assert_frame_equal(df, df_orig)

From 693bc4f7cb1a6362fe87af596280d83a514f2f43 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 28 Jun 2021 09:41:52 +0200
Subject: [PATCH 08/21] fix series setitem -> don't update parent cache

---
 pandas/core/series.py                       |  6 ++++-
 pandas/tests/indexing/test_copy_on_write.py | 26 +++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 5b8c1cd27c189..11d1af61d032e 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1227,7 +1227,11 @@ def _maybe_update_cacher(
             if ref is None:
                 del self._cacher
             else:
-                if len(self) == len(ref):
+                # for ArrayManager with CoW, we never want to update the parent
+                # DataFrame cache if the Series changed, and always pop the cached item
+                if len(self) == len(ref) and not isinstance(
+                    self._mgr, SingleArrayManager
+                ):
                     # otherwise, either self or ref has swapped in new arrays
                     ref._maybe_cache_changed(cacher[0], self)
                 else:
diff --git a/pandas/tests/indexing/test_copy_on_write.py b/pandas/tests/indexing/test_copy_on_write.py
index ce68b24ca718b..780e9d11b107c 100644
--- a/pandas/tests/indexing/test_copy_on_write.py
+++ b/pandas/tests/indexing/test_copy_on_write.py
@@ -477,6 +477,32 @@ def test_column_as_series(using_array_manager):
         tm.assert_frame_equal(df, df_orig)
 
 
+def test_column_as_series_set_with_upcast(using_array_manager):
+    # Case: selecting a single column now also uses Copy-on-Write -> when
+    # setting a value causes an upcast, we don't need to update the parent
+    # DataFrame through the cache mechanism
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+
+    s = df["a"]
+    if using_array_manager:
+        s[0] = "foo"
+    else:
+        with pd.option_context("chained_assignment", "warn"):
+            with tm.assert_produces_warning(com.SettingWithCopyWarning):
+                s[0] = "foo"
+
+    expected = pd.Series(["foo", 2, 3], dtype=object, name="a")
+    tm.assert_series_equal(s, expected)
+    if using_array_manager:
+        tm.assert_frame_equal(df, df_orig)
+        # ensure cached series on getitem is not the changed series
+        tm.assert_series_equal(df["a"], df_orig["a"])
+    else:
+        df_orig["a"] = expected
+        tm.assert_frame_equal(df, df_orig)
+
+
 # TODO add tests for other indexing methods on the Series
 
 

From a154591e6976729724c2ac1e26d269803ceff97f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 28 Jun 2021 18:26:21 +0200
Subject: [PATCH 09/21] update indexing tests with new behaviour to get them
 passing

---
 pandas/core/internals/array_manager.py        |   1 +
 pandas/tests/indexes/period/test_period.py    |   8 +-
 .../multiindex/test_chaining_and_caching.py   |   1 +
 .../tests/indexing/multiindex/test_setitem.py |  38 ++++--
 .../indexing/test_chaining_and_caching.py     | 126 ++++++++++++------
 pandas/tests/indexing/test_iloc.py            |  25 +++-
 pandas/tests/indexing/test_loc.py             |  25 ++--
 pandas/tests/indexing/test_scalar.py          |  10 +-
 .../series/accessors/test_dt_accessor.py      |  13 +-
 pandas/tests/series/indexing/test_indexing.py |   9 +-
 pandas/tests/series/methods/test_copy.py      |  32 +++--
 11 files changed, 208 insertions(+), 80 deletions(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 59eeb04c7f644..2330788c997f5 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -378,6 +378,7 @@ def where(self: T, other, cond, align: bool, errors: str) -> T:
     #     return self.apply_with_block("setitem", indexer=indexer, value=value)
 
     def putmask(self, mask, new, align: bool = True):
+        # TODO(CoW) check references to copy if needed
         if align:
             align_keys = ["new", "mask"]
         else:
diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
index 83c82c18f3d1e..519d21c044011 100644
--- a/pandas/tests/indexes/period/test_period.py
+++ b/pandas/tests/indexes/period/test_period.py
@@ -255,16 +255,20 @@ def test_is_(self):
         assert not index.is_(index - 2)
         assert not index.is_(index - 0)
 
-    def test_index_duplicate_periods(self):
+    def test_index_duplicate_periods(self, using_array_manager):
         # monotonic
         idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
         ts = Series(np.random.randn(len(idx)), index=idx)
+        original = ts.copy()
 
         result = ts["2007"]
         expected = ts[1:3]
         tm.assert_series_equal(result, expected)
         result[:] = 1
-        assert (ts[1:3] == 1).all()
+        if using_array_manager:
+            tm.assert_series_equal(ts, original)
+        else:
+            assert (ts[1:3] == 1).all()
 
         # not monotonic
         idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN")
diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py
index 6ccd44e698a8a..d4fe88c835ee1 100644
--- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py
+++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py
@@ -12,6 +12,7 @@
 import pandas.core.common as com
 
 
+@td.skip_array_manager_not_yet_implemented
 def test_detect_chained_assignment():
     # Inplace ops, originally from:
     # https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py
index 5d0aeba4aebbc..c10bb6af195d2 100644
--- a/pandas/tests/indexing/multiindex/test_setitem.py
+++ b/pandas/tests/indexing/multiindex/test_setitem.py
@@ -390,16 +390,24 @@ def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
         reindexed = dft.reindex(columns=[("foo", "two")])
         tm.assert_series_equal(reindexed["foo", "two"], s > s.median())
 
-    def test_set_column_scalar_with_loc(self, multiindex_dataframe_random_data):
+    def test_set_column_scalar_with_loc(
+        self, multiindex_dataframe_random_data, using_array_manager
+    ):
         frame = multiindex_dataframe_random_data
         subset = frame.index[[1, 4, 5]]
 
         frame.loc[subset] = 99
         assert (frame.loc[subset].values == 99).all()
 
+        frame_original = frame.copy()
+
         col = frame["B"]
         col[subset] = 97
-        assert (frame.loc[subset, "B"] == 97).all()
+        if using_array_manager:
+            # chained setitem doesn't work with CoW
+            tm.assert_frame_equal(frame, frame_original)
+        else:
+            assert (frame.loc[subset, "B"] == 97).all()
 
     def test_nonunique_assignment_1750(self):
         df = DataFrame(
@@ -472,21 +480,31 @@ def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
     assert (df["foo"].values == 0).all()
 
 
-def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
+def test_frame_setitem_copy_raises(
+    multiindex_dataframe_random_data, using_array_manager
+):
     # will raise/warn as its chained assignment
     df = multiindex_dataframe_random_data.T
-    msg = "A value is trying to be set on a copy of a slice from a DataFrame"
-    with pytest.raises(com.SettingWithCopyError, match=msg):
+    if using_array_manager:
+        # TODO(CoW) it would be nice if this could still warn/raise
         df["foo"]["one"] = 2
+    else:
+        msg = "A value is trying to be set on a copy of a slice from a DataFrame"
+        with pytest.raises(com.SettingWithCopyError, match=msg):
+            df["foo"]["one"] = 2
 
 
-def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
+def test_frame_setitem_copy_no_write(
+    multiindex_dataframe_random_data, using_array_manager
+):
     frame = multiindex_dataframe_random_data.T
     expected = frame
     df = frame.copy()
-    msg = "A value is trying to be set on a copy of a slice from a DataFrame"
-    with pytest.raises(com.SettingWithCopyError, match=msg):
+    if using_array_manager:
         df["foo"]["one"] = 2
+    else:
+        msg = "A value is trying to be set on a copy of a slice from a DataFrame"
+        with pytest.raises(com.SettingWithCopyError, match=msg):
+            df["foo"]["one"] = 2
 
-    result = df
-    tm.assert_frame_equal(result, expected)
+    tm.assert_frame_equal(df, expected)
diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py
index a38c652953fab..8780ba2b7c194 100644
--- a/pandas/tests/indexing/test_chaining_and_caching.py
+++ b/pandas/tests/indexing/test_chaining_and_caching.py
@@ -31,7 +31,7 @@ def random_text(nobs=100):
 
 
 class TestCaching:
-    def test_slice_consolidate_invalidate_item_cache(self):
+    def test_slice_consolidate_invalidate_item_cache(self, using_array_manager):
 
         # this is chained assignment, but will 'work'
         with option_context("chained_assignment", None):
@@ -51,7 +51,11 @@ def test_slice_consolidate_invalidate_item_cache(self):
             # Assignment to wrong series
             df["bb"].iloc[0] = 0.17
             df._clear_item_cache()
-            tm.assert_almost_equal(df["bb"][0], 0.17)
+            if not using_array_manager:
+                tm.assert_almost_equal(df["bb"][0], 0.17)
+            else:
+                # with ArrayManager, parent is not mutated with chained assignment
+                tm.assert_almost_equal(df["bb"][0], 2.2)
 
     @pytest.mark.parametrize("do_ref", [True, False])
     def test_setitem_cache_updating(self, do_ref):
@@ -70,7 +74,7 @@ def test_setitem_cache_updating(self, do_ref):
         assert df.loc[0, "c"] == 0.0
         assert df.loc[7, "c"] == 1.0
 
-    def test_setitem_cache_updating_slices(self):
+    def test_setitem_cache_updating_slices(self, using_array_manager):
         # GH 7084
         # not updating cache on series setting with slices
         expected = DataFrame(
@@ -91,12 +95,17 @@ def test_setitem_cache_updating_slices(self):
         # try via a chain indexing
         # this actually works
         out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
+        out_original = out.copy()
         for ix, row in df.iterrows():
             v = out[row["C"]][six:eix] + row["D"]
             out[row["C"]][six:eix] = v
 
-        tm.assert_frame_equal(out, expected)
-        tm.assert_series_equal(out["A"], expected["A"])
+        if not using_array_manager:
+            tm.assert_frame_equal(out, expected)
+            tm.assert_series_equal(out["A"], expected["A"])
+        else:
+            tm.assert_frame_equal(out, out_original)
+            tm.assert_series_equal(out["A"], out_original["A"])
 
         out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
         for ix, row in df.iterrows():
@@ -122,6 +131,8 @@ def test_altering_series_clears_parent_cache(self):
 
 
 class TestChaining:
+    # TODO(CoW) fix Series setitem with mask
+    @td.skip_array_manager_not_yet_implemented
     def test_setitem_chained_setfault(self):
 
         # GH6026
@@ -157,18 +168,22 @@ def test_setitem_chained_setfault(self):
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.arm_slow
-    def test_detect_chained_assignment(self):
+    def test_detect_chained_assignment(self, using_array_manager):
 
         pd.set_option("chained_assignment", "raise")
 
         # work with the chain
         expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB"))
         df = DataFrame(np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64")
+        df_original = df.copy()
         assert df._is_copy is None
 
         df["A"][0] = -5
         df["A"][1] = -6
-        tm.assert_frame_equal(df, expected)
+        if using_array_manager:
+            tm.assert_frame_equal(df, df_original)
+        else:
+            tm.assert_frame_equal(df, expected)
 
     @pytest.mark.arm_slow
     def test_detect_chained_assignment_raises(self, using_array_manager):
@@ -180,6 +195,7 @@ def test_detect_chained_assignment_raises(self, using_array_manager):
                 "B": np.array(np.arange(2, 4), dtype=np.float64),
             }
         )
+        df_original = df.copy()
         assert df._is_copy is None
 
         if not using_array_manager:
@@ -196,12 +212,10 @@ def test_detect_chained_assignment_raises(self, using_array_manager):
             # a mixed dataframe
             df["A"][0] = -5
             df["A"][1] = -6
-            expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB"))
-            expected["B"] = expected["B"].astype("float64")
-            tm.assert_frame_equal(df, expected)
+            tm.assert_frame_equal(df, df_original)
 
     @pytest.mark.arm_slow
-    def test_detect_chained_assignment_fails(self):
+    def test_detect_chained_assignment_fails(self, using_array_manager):
 
         # Using a copy (the chain), fails
         df = DataFrame(
@@ -211,11 +225,15 @@ def test_detect_chained_assignment_fails(self):
             }
         )
 
-        with pytest.raises(com.SettingWithCopyError, match=msg):
+        if using_array_manager:
+            # TODO(CoW) can we still warn here?
             df.loc[0]["A"] = -5
+        else:
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                df.loc[0]["A"] = -5
 
     @pytest.mark.arm_slow
-    def test_detect_chained_assignment_doc_example(self):
+    def test_detect_chained_assignment_doc_example(self, using_array_manager):
 
         # Doc example
         df = DataFrame(
@@ -226,30 +244,36 @@ def test_detect_chained_assignment_doc_example(self):
         )
         assert df._is_copy is None
 
-        with pytest.raises(com.SettingWithCopyError, match=msg):
+        if using_array_manager:
+            # TODO(CoW) can we still warn here?
             indexer = df.a.str.startswith("o")
             df[indexer]["c"] = 42
+        else:
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                indexer = df.a.str.startswith("o")
+                df[indexer]["c"] = 42
 
     @pytest.mark.arm_slow
     def test_detect_chained_assignment_object_dtype(self, using_array_manager):
 
         expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
         df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
-
-        with pytest.raises(com.SettingWithCopyError, match=msg):
-            df.loc[0]["A"] = 111
+        df_original = df.copy()
 
         if not using_array_manager:
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                df.loc[0]["A"] = 111
+
             with pytest.raises(com.SettingWithCopyError, match=msg):
                 df["A"][0] = 111
 
             df.loc[0, "A"] = 111
+            tm.assert_frame_equal(df, expected)
         else:
-            # INFO(ArrayManager) for ArrayManager it doesn't matter that it's
-            # a mixed dataframe
+            # TODO(CoW) can we still warn here?
             df["A"][0] = 111
-
-        tm.assert_frame_equal(df, expected)
+            df.loc[0]["A"] = 111
+            tm.assert_frame_equal(df, df_original)
 
     @pytest.mark.arm_slow
     def test_detect_chained_assignment_is_copy_pickle(self):
@@ -297,6 +321,7 @@ def test_detect_chained_assignment_implicit_take(self):
         df["letters"] = df["letters"].apply(str.lower)
 
     @pytest.mark.arm_slow
+    @td.skip_array_manager_invalid_test  # _is_copy is not always set for AM
     def test_detect_chained_assignment_implicit_take2(self):
 
         # Implicitly take 2
@@ -354,15 +379,21 @@ def test_detect_chained_assignment_false_positives(self):
         str(df)
 
     @pytest.mark.arm_slow
-    def test_detect_chained_assignment_undefined_column(self):
+    def test_detect_chained_assignment_undefined_column(self, using_array_manager):
 
         # from SO:
         # https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
         df = DataFrame(np.arange(0, 9), columns=["count"])
         df["group"] = "b"
+        df_original = df.copy()
 
-        with pytest.raises(com.SettingWithCopyError, match=msg):
+        if using_array_manager:
+            # TODO(CoW) can we still warn here?
             df.iloc[0:5]["group"] = "a"
+            tm.assert_frame_equal(df, df_original)
+        else:
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                df.iloc[0:5]["group"] = "a"
 
     @pytest.mark.arm_slow
     def test_detect_chained_assignment_changing_dtype(self, using_array_manager):
@@ -376,32 +407,40 @@ def test_detect_chained_assignment_changing_dtype(self, using_array_manager):
                 "D": ["a", "b", "c", "d", "e"],
             }
         )
+        df_original = df.copy()
 
-        with pytest.raises(com.SettingWithCopyError, match=msg):
-            df.loc[2]["D"] = "foo"
+        if not using_array_manager:
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                df.loc[2]["D"] = "foo"
 
-        with pytest.raises(com.SettingWithCopyError, match=msg):
-            df.loc[2]["C"] = "foo"
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                df.loc[2]["C"] = "foo"
 
-        if not using_array_manager:
             with pytest.raises(com.SettingWithCopyError, match=msg):
                 df["C"][2] = "foo"
         else:
-            # INFO(ArrayManager) for ArrayManager it doesn't matter if it's
-            # changing the dtype or not
+            # breakpoint()
+            # TODO(CoW) can we still warn here?
+            # df.loc[2]["D"] = "foo"
+            # df.loc[2]["C"] = "foo"
             df["C"][2] = "foo"
-            assert df.loc[2, "C"] == "foo"
+            tm.assert_frame_equal(df, df_original)
 
-    def test_setting_with_copy_bug(self):
+    def test_setting_with_copy_bug(self, using_array_manager):
 
         # operating on a copy
         df = DataFrame(
             {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
         )
+        df_original = df.copy()
         mask = pd.isna(df.c)
 
-        with pytest.raises(com.SettingWithCopyError, match=msg):
+        if not using_array_manager:
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                df[["c"]][mask] = df[["b"]][mask]
+        else:
             df[["c"]][mask] = df[["b"]][mask]
+            tm.assert_frame_equal(df, df_original)
 
     def test_setting_with_copy_bug_no_warning(self):
         # invalid warning as we are returning a new object
@@ -412,6 +451,7 @@ def test_setting_with_copy_bug_no_warning(self):
         # this should not raise
         df2["y"] = ["g", "h", "i"]
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(CoW) can we still warn/raise?
     def test_detect_chained_assignment_warnings_errors(self):
         df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
         with option_context("chained_assignment", "warn"):
@@ -422,18 +462,25 @@ def test_detect_chained_assignment_warnings_errors(self):
             with pytest.raises(com.SettingWithCopyError, match=msg):
                 df.loc[0]["A"] = 111
 
-    def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
+    def test_detect_chained_assignment_warnings_filter_and_dupe_cols(
+        self, using_array_manager
+    ):
         # xref gh-13017.
         with option_context("chained_assignment", "warn"):
             df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"])
+            df_original = df.copy()
 
-            with tm.assert_produces_warning(com.SettingWithCopyWarning):
+            warn = None if using_array_manager else com.SettingWithCopyWarning
+            with tm.assert_produces_warning(warn):
                 df.c.loc[df.c > 0] = None
 
             expected = DataFrame(
                 [[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"]
             )
-            tm.assert_frame_equal(df, expected)
+            if using_array_manager:
+                tm.assert_frame_equal(df, df_original)
+            else:
+                tm.assert_frame_equal(df, expected)
 
     # TODO(ArrayManager) fast_xs with array-like scalars is not yet working
     @td.skip_array_manager_not_yet_implemented
@@ -484,7 +531,7 @@ def test_cache_updating2(self):
         expected = Series([0, 0, 0, 2, 0], name="f")
         tm.assert_series_equal(df.f, expected)
 
-    def test_iloc_setitem_chained_assignment(self):
+    def test_iloc_setitem_chained_assignment(self, using_array_manager):
         # GH#3970
         with option_context("chained_assignment", None):
             df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
@@ -498,7 +545,10 @@ def test_iloc_setitem_chained_assignment(self):
             df_tmp = df.iloc[ck]  # noqa
 
             df["bb"].iloc[0] = 0.15
-            assert df["bb"].iloc[0] == 0.15
+            if not using_array_manager:
+                assert df["bb"].iloc[0] == 0.15
+            else:
+                assert df["bb"].iloc[0] == 2.2
 
     def test_getitem_loc_assignment_slice_state(self):
         # GH 13569
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index b04a2c86a79d7..987824a735607 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -80,10 +80,13 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
         indexer(df)[key, 0] = cat
 
         overwrite = isinstance(key, slice) and key == slice(None)
+        if using_array_manager and indexer == tm.loc:
+            # TODO(ArrayManager) with loc, slice(3) gets converted into slice(0, 3)
+            # which is then considered as "full" slice and does overwrite. For iloc
+            # this conversion is not done, and so it doesn't overwrite
+            overwrite = overwrite or (isinstance(key, slice) and key == slice(3))
 
-        if overwrite or using_array_manager:
-            # TODO(ArrayManager) we always overwrite because ArrayManager takes
-            #  the "split" path, which still overwrites
+        if overwrite:
             # TODO: GH#39986 this probably shouldn't behave differently
             expected = DataFrame({0: cat})
             assert not np.shares_memory(df.values, orig_vals)
@@ -107,6 +110,8 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
         orig_vals = df.values
         indexer(df)[key, 0] = cat
         expected = DataFrame({0: cat, 1: range(3)})
+        if using_array_manager and not overwrite:
+            expected[0] = expected[0].astype(object)
         tm.assert_frame_equal(df, expected)
 
     # TODO(ArrayManager) does not yet update parent
@@ -847,7 +852,11 @@ def test_identity_slice_returns_new_object(self, using_array_manager):
 
         # should also be a shallow copy
         original_series[:3] = [7, 8, 9]
-        assert all(sliced_series[:3] == [7, 8, 9])
+        if using_array_manager:
+            # shallow copy not updated (CoW)
+            assert all(sliced_series[:3] == [1, 2, 3])
+        else:
+            assert all(sliced_series[:3] == [7, 8, 9])
 
     def test_indexing_zerodim_np_array(self):
         # GH24919
@@ -1308,8 +1317,9 @@ def test_frame_iloc_setitem_callable(self):
 
 
 class TestILocSeries:
-    def test_iloc(self):
+    def test_iloc(self, using_array_manager):
         ser = Series(np.random.randn(10), index=list(range(0, 20, 2)))
+        ser_original = ser.copy()
 
         for i in range(len(ser)):
             result = ser.iloc[i]
@@ -1323,7 +1333,10 @@ def test_iloc(self):
 
         # test slice is a view
         result[:] = 0
-        assert (ser[1:3] == 0).all()
+        if using_array_manager:
+            tm.assert_series_equal(ser, ser_original)
+        else:
+            assert (ser[1:3] == 0).all()
 
         # list of integers
         result = ser.iloc[[0, 2, 3, 4, 5]]
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index e96b25418d408..cf818be9bce26 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -600,29 +600,33 @@ def test_loc_setitem_frame_with_reindex(self, using_array_manager):
         # setting integer values into a float dataframe with loc is inplace,
         #  so we retain float dtype
         ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float)
-        if using_array_manager:
-            # TODO(ArrayManager) with "split" path, we still overwrite the column
-            # and therefore don't take the dtype of the underlying object into account
-            ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64")
         expected = DataFrame({"A": ser})
         tm.assert_frame_equal(df, expected)
 
-    def test_loc_setitem_frame_with_reindex_mixed(self):
+    def test_loc_setitem_frame_with_reindex_mixed(self, using_array_manager):
         # GH#40480
         df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float)
         df["B"] = "string"
         df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")
-        ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64")
+        ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float)
+        if not using_array_manager:
+            # when using BlockManager, this takes the "split" path, which
+            # still overwrites the column
+            ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64")
         expected = DataFrame({"A": ser})
         expected["B"] = "string"
         tm.assert_frame_equal(df, expected)
 
-    def test_loc_setitem_frame_with_inverted_slice(self):
+    def test_loc_setitem_frame_with_inverted_slice(self, using_array_manager):
         # GH#40480
         df = DataFrame(index=[1, 2, 3], columns=["A", "B"], dtype=float)
         df["B"] = "string"
         df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64")
-        expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3])
+        expected = DataFrame({"A": [3.0, 2.0, 1.0], "B": "string"}, index=[1, 2, 3])
+        if not using_array_manager:
+            # when using BlockManager, this takes the "split" path, which
+            # still overwrites the column
+            expected["A"] = expected["A"].astype("int64")
         tm.assert_frame_equal(df, expected)
 
     # TODO(ArrayManager) "split" path overwrites column and therefore don't take
@@ -988,7 +992,10 @@ def test_identity_slice_returns_new_object(self, using_array_manager):
         assert original_series[:] is not original_series
 
         original_series[:3] = [7, 8, 9]
-        assert all(sliced_series[:3] == [7, 8, 9])
+        if using_array_manager:
+            assert all(sliced_series[:3] == [1, 2, 3])
+        else:
+            assert all(sliced_series[:3] == [7, 8, 9])
 
     @pytest.mark.xfail(reason="accidental fix reverted - GH37497")
     def test_loc_copy_vs_view(self):
diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py
index 39611bce2b4fa..1f99947d77d36 100644
--- a/pandas/tests/indexing/test_scalar.py
+++ b/pandas/tests/indexing/test_scalar.py
@@ -7,6 +7,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import (
     DataFrame,
     Series,
@@ -46,7 +48,11 @@ def _check(f, func, values=False):
         for f in [d["ints"], d["uints"], d["labels"], d["ts"], d["floats"]]:
             _check(f, "at")
 
-    @pytest.mark.parametrize("kind", ["series", "frame"])
+    # TODO(CoW) fix at/iat setting on a DataFrame
+    @pytest.mark.parametrize(
+        "kind",
+        ["series", pytest.param("frame", marks=td.skip_array_manager_invalid_test)],
+    )
     def test_at_and_iat_set(self, kind):
         def _check(f, func, values=False):
 
@@ -207,6 +213,8 @@ def test_mixed_index_at_iat_loc_iloc_dataframe(self):
         with pytest.raises(KeyError, match="^3$"):
             df.loc[0, 3]
 
+    # TODO(CoW) fix at/iat setting on a DataFrame
+    @td.skip_array_manager_invalid_test
     def test_iat_setter_incompatible_assignment(self):
         # GH 23236
         result = DataFrame({"a": [0, 1], "b": [4, 5]})
diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py
index 61a22dad5d09b..2ee51c9e33566 100644
--- a/pandas/tests/series/accessors/test_dt_accessor.py
+++ b/pandas/tests/series/accessors/test_dt_accessor.py
@@ -41,7 +41,7 @@
 
 
 class TestSeriesDatetimeValues:
-    def test_dt_namespace_accessor(self):
+    def test_dt_namespace_accessor(self, using_array_manager):
 
         # GH 7207, 11128
         # test .dt namespace accessor
@@ -258,10 +258,15 @@ def get_dir(s):
             s.dt.hour = 5
 
         # trying to set a copy
-        msg = "modifications to a property of a datetimelike.+not supported"
-        with pd.option_context("chained_assignment", "raise"):
-            with pytest.raises(com.SettingWithCopyError, match=msg):
+        if using_array_manager:
+            # TODO(CoW) it would be nice to keep a warning/error for this case
+            with pd.option_context("chained_assignment", "raise"):
                 s.dt.hour[0] = 5
+        else:
+            msg = "modifications to a property of a datetimelike.+not supported"
+            with pd.option_context("chained_assignment", "raise"):
+                with pytest.raises(com.SettingWithCopyError, match=msg):
+                    s.dt.hour[0] = 5
 
     @pytest.mark.parametrize(
         "method, dates",
diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py
index 6c3587c7eeada..c0a52e642dce4 100644
--- a/pandas/tests/series/indexing/test_indexing.py
+++ b/pandas/tests/series/indexing/test_indexing.py
@@ -204,7 +204,8 @@ def test_basic_getitem_setitem_corner(datetime_series):
         datetime_series[[5, slice(None, None)]] = 2
 
 
-def test_slice(string_series, object_series):
+def test_slice(string_series, object_series, using_array_manager):
+    original = string_series.copy()
     numSlice = string_series[10:20]
     numSliceEnd = string_series[-10:]
     objSlice = object_series[10:20]
@@ -222,7 +223,11 @@ def test_slice(string_series, object_series):
     sl = string_series[10:20]
     sl[:] = 0
 
-    assert (string_series[10:20] == 0).all()
+    if using_array_manager:
+        # Doesn't modify parent (CoW)
+        tm.assert_series_equal(string_series, original)
+    else:
+        assert (string_series[10:20] == 0).all()
 
 
 def test_timedelta_assignment():
diff --git a/pandas/tests/series/methods/test_copy.py b/pandas/tests/series/methods/test_copy.py
index 8aa5c14812dc0..26966592d563b 100644
--- a/pandas/tests/series/methods/test_copy.py
+++ b/pandas/tests/series/methods/test_copy.py
@@ -9,20 +9,28 @@
 
 
 class TestCopy:
-    @pytest.mark.parametrize("deep", [None, False, True])
-    def test_copy(self, deep):
+    @pytest.mark.parametrize("deep", ["default", None, False, True])
+    def test_copy(self, deep, using_array_manager):
 
         ser = Series(np.arange(10), dtype="float64")
 
         # default deep is True
-        if deep is None:
+        if deep == "default":
             ser2 = ser.copy()
         else:
             ser2 = ser.copy(deep=deep)
 
+        if using_array_manager:
+            # INFO(ArrayManager) a shallow copy doesn't yet copy the data
+            # but parent will not be modified (CoW)
+            if deep is None or deep is False:
+                assert np.may_share_memory(ser.values, ser2.values)
+            else:
+                assert not np.may_share_memory(ser.values, ser2.values)
+
         ser2[::2] = np.NaN
 
-        if deep is None or deep is True:
+        if deep == "default" or deep is None or deep is True or using_array_manager:
             # Did not modify original Series
             assert np.isnan(ser2[0])
             assert not np.isnan(ser[0])
@@ -31,8 +39,8 @@ def test_copy(self, deep):
             assert np.isnan(ser2[0])
             assert np.isnan(ser[0])
 
-    @pytest.mark.parametrize("deep", [None, False, True])
-    def test_copy_tzaware(self, deep):
+    @pytest.mark.parametrize("deep", ["default", None, False, True])
+    def test_copy_tzaware(self, deep, using_array_manager):
         # GH#11794
         # copy of tz-aware
         expected = Series([Timestamp("2012/01/01", tz="UTC")])
@@ -40,15 +48,23 @@ def test_copy_tzaware(self, deep):
 
         ser = Series([Timestamp("2012/01/01", tz="UTC")])
 
-        if deep is None:
+        if deep == "default":
             ser2 = ser.copy()
         else:
             ser2 = ser.copy(deep=deep)
 
+        if using_array_manager:
+            # INFO(ArrayManager) a shallow copy doesn't yet copy the data
+            # but parent will not be modified (CoW)
+            if deep is None or deep is False:
+                assert np.may_share_memory(ser.values, ser2.values)
+            else:
+                assert not np.may_share_memory(ser.values, ser2.values)
+
         ser2[0] = Timestamp("1999/01/01", tz="UTC")
 
         # default deep is True
-        if deep is None or deep is True:
+        if deep == "default" or deep is None or deep is True or using_array_manager:
             # Did not modify original Series
             tm.assert_series_equal(ser2, expected2)
             tm.assert_series_equal(ser, expected)

From 67413409a5dd06cacaa242b8926f5f5ad8f5ef10 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 4 Aug 2021 14:33:54 +0200
Subject: [PATCH 10/21] fix new test + skip Series.update tests for now

---
 pandas/core/frame.py                            |  8 ++++++--
 pandas/tests/frame/methods/test_update.py       |  4 ++++
 .../tests/indexing/test_chaining_and_caching.py | 17 +++++++++++++----
 pandas/tests/indexing/test_partial.py           |  8 +++++++-
 pandas/tests/series/indexing/test_indexing.py   |  4 ++++
 pandas/tests/series/methods/test_update.py      |  3 +++
 6 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6038994b55378..0729039fc2e17 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3801,8 +3801,12 @@ def _set_value(
         """
         try:
             if takeable:
-                series = self._ixs(col, axis=1)
-                series._set_value(index, value, takeable=True)
+                if isinstance(self._mgr, ArrayManager):
+                    # with CoW, we can't use intermediate series
+                    self._mgr.column_setitem(col, index, value)
+                else:
+                    series = self._ixs(col, axis=1)
+                    series._set_value(index, value, takeable=True)
                 return
 
             series = self._get_item_cache(col)
diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py
index 408113e9bc417..add190de73995 100644
--- a/pandas/tests/frame/methods/test_update.py
+++ b/pandas/tests/frame/methods/test_update.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -138,6 +140,8 @@ def test_update_datetime_tz(self):
         expected = DataFrame([pd.Timestamp("2019", tz="UTC")])
         tm.assert_frame_equal(result, expected)
 
+    # TODO(CoW) what should the update method do? -> deprecate this?
+    @td.skip_array_manager_not_yet_implemented
     def test_update_with_different_dtype(self):
         # GH#3217
         df = DataFrame({"a": [1, 3], "b": [np.nan, 2]})
diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py
index fc82be4325b3d..8eba27a907280 100644
--- a/pandas/tests/indexing/test_chaining_and_caching.py
+++ b/pandas/tests/indexing/test_chaining_and_caching.py
@@ -483,14 +483,23 @@ def test_detect_chained_assignment_warnings_filter_and_dupe_cols(
                 tm.assert_frame_equal(df, expected)
 
     @pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})])
-    def test_detect_chained_assignment_warning_stacklevel(self, rhs):
+    def test_detect_chained_assignment_warning_stacklevel(
+        self, rhs, using_array_manager
+    ):
         # GH#42570
         df = DataFrame(np.arange(25).reshape(5, 5))
+        df_original = df.copy()
         chained = df.loc[:3]
         with option_context("chained_assignment", "warn"):
-            with tm.assert_produces_warning(com.SettingWithCopyWarning) as t:
-                chained[2] = rhs
-                assert t[0].filename == __file__
+            if not using_array_manager:
+                with tm.assert_produces_warning(com.SettingWithCopyWarning) as t:
+                    chained[2] = rhs
+                    assert t[0].filename == __file__
+            else:
+                # INFO(CoW) no warning, and original dataframe not changed
+                with tm.assert_produces_warning(None):
+                    chained[2] = rhs
+                tm.assert_frame_equal(df, df_original)
 
     # TODO(ArrayManager) fast_xs with array-like scalars is not yet working
     @td.skip_array_manager_not_yet_implemented
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index 693e67652c912..dd28baa9a45f9 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -74,7 +74,13 @@ def test_partial_setting(self):
         with pytest.raises(IndexError, match=msg):
             df.iloc[4, 2] = 5.0
 
-        msg = "index 2 is out of bounds for axis 0 with size 2"
+        msg = "|".join(
+            [
+                "index 2 is out of bounds for axis 0 with size 2",
+                # TODO(ArrayManager) improve error message
+                "list index out of range",
+            ]
+        )
         with pytest.raises(IndexError, match=msg):
             df.iat[4, 2] = 5.0
 
diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py
index c0a52e642dce4..5c6030d52af5c 100644
--- a/pandas/tests/series/indexing/test_indexing.py
+++ b/pandas/tests/series/indexing/test_indexing.py
@@ -5,6 +5,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import (
     DataFrame,
     IndexSlice,
@@ -244,6 +246,8 @@ def test_timedelta_assignment():
     tm.assert_series_equal(s, expected)
 
 
+# TODO(CoW) what should the update method do? -> deprecate this?
+@td.skip_array_manager_not_yet_implemented
 def test_underlying_data_conversion():
     # GH 4080
     df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
diff --git a/pandas/tests/series/methods/test_update.py b/pandas/tests/series/methods/test_update.py
index d9d6641d54237..17d05711ffefe 100644
--- a/pandas/tests/series/methods/test_update.py
+++ b/pandas/tests/series/methods/test_update.py
@@ -14,6 +14,9 @@
 
 
 class TestUpdate:
+
+    # TODO(CoW) what should the update method do? -> deprecate this?
+    @td.skip_array_manager_not_yet_implemented
     def test_update(self):
         s = Series([1.5, np.nan, 3.0, 4.0, np.nan])
         s2 = Series([np.nan, 3.5, np.nan, 5.0])

From c4527e9d32673525306d906bc97ddbb8289093a2 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 4 Aug 2021 14:49:24 +0200
Subject: [PATCH 11/21] remove chained assignment outside indexing tests

---
 pandas/tests/frame/methods/test_dropna.py     |  2 +-
 .../tests/frame/methods/test_interpolate.py   | 28 ++++++++++---------
 pandas/tests/frame/methods/test_isin.py       |  6 ++--
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py
index 76a6f3aa25362..bc2b48d3312d7 100644
--- a/pandas/tests/frame/methods/test_dropna.py
+++ b/pandas/tests/frame/methods/test_dropna.py
@@ -66,7 +66,7 @@ def test_dropIncompleteRows(self, float_frame):
 
     def test_dropna(self):
         df = DataFrame(np.random.randn(6, 4))
-        df[2][:2] = np.nan
+        df.iloc[:2, 2] = np.nan
 
         dropped = df.dropna(axis=1)
         expected = df.loc[:, [0, 1, 3]]
diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
index d0551ffd5cffe..45129f7c0b366 100644
--- a/pandas/tests/frame/methods/test_interpolate.py
+++ b/pandas/tests/frame/methods/test_interpolate.py
@@ -102,34 +102,34 @@ def test_interp_various(self):
         expected = df.copy()
         result = df.interpolate(method="polynomial", order=1)
 
-        expected.A.loc[3] = 2.66666667
-        expected.A.loc[13] = 5.76923076
+        expected.loc[3, "A"] = 2.66666667
+        expected.loc[13, "A"] = 5.76923076
         tm.assert_frame_equal(result, expected)
 
         result = df.interpolate(method="cubic")
         # GH #15662.
-        expected.A.loc[3] = 2.81547781
-        expected.A.loc[13] = 5.52964175
+        expected.loc[3, "A"] = 2.81547781
+        expected.loc[13, "A"] = 5.52964175
         tm.assert_frame_equal(result, expected)
 
         result = df.interpolate(method="nearest")
-        expected.A.loc[3] = 2
-        expected.A.loc[13] = 5
+        expected.loc[3, "A"] = 2
+        expected.loc[13, "A"] = 5
         tm.assert_frame_equal(result, expected, check_dtype=False)
 
         result = df.interpolate(method="quadratic")
-        expected.A.loc[3] = 2.82150771
-        expected.A.loc[13] = 6.12648668
+        expected.loc[3, "A"] = 2.82150771
+        expected.loc[13, "A"] = 6.12648668
         tm.assert_frame_equal(result, expected)
 
         result = df.interpolate(method="slinear")
-        expected.A.loc[3] = 2.66666667
-        expected.A.loc[13] = 5.76923077
+        expected.loc[3, "A"] = 2.66666667
+        expected.loc[13, "A"] = 5.76923077
         tm.assert_frame_equal(result, expected)
 
         result = df.interpolate(method="zero")
-        expected.A.loc[3] = 2.0
-        expected.A.loc[13] = 5
+        expected.loc[3, "A"] = 2.0
+        expected.loc[13, "A"] = 5
         tm.assert_frame_equal(result, expected, check_dtype=False)
 
     @td.skip_if_no_scipy
@@ -218,7 +218,7 @@ def test_interp_leading_nans(self, check_scipy):
         )
         result = df.interpolate()
         expected = df.copy()
-        expected["B"].loc[3] = -3.75
+        expected.loc[3, "B"] = -3.75
         tm.assert_frame_equal(result, expected)
 
         if check_scipy:
@@ -254,6 +254,8 @@ def test_interp_raise_on_all_object_dtype(self):
         with pytest.raises(TypeError, match=msg):
             df.interpolate()
 
+    # TODO(CoW) inplace method on Series -> OK with not updating parent?
+    @td.skip_array_manager_not_yet_implemented
     def test_interp_inplace(self):
         df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]})
         expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]})
diff --git a/pandas/tests/frame/methods/test_isin.py b/pandas/tests/frame/methods/test_isin.py
index d2ebd09c4cc48..e924963f588f3 100644
--- a/pandas/tests/frame/methods/test_isin.py
+++ b/pandas/tests/frame/methods/test_isin.py
@@ -79,8 +79,8 @@ def test_isin_df(self):
         df2 = DataFrame({"A": [0, 2, 12, 4], "B": [2, np.nan, 4, 5]})
         expected = DataFrame(False, df1.index, df1.columns)
         result = df1.isin(df2)
-        expected["A"].loc[[1, 3]] = True
-        expected["B"].loc[[0, 2]] = True
+        expected.loc[[1, 3], "A"] = True
+        expected.loc[[0, 2], "B"] = True
         tm.assert_frame_equal(result, expected)
 
         # partial overlapping columns
@@ -133,7 +133,7 @@ def test_isin_against_series(self):
         )
         s = Series([1, 3, 11, 4], index=["a", "b", "c", "d"])
         expected = DataFrame(False, index=df.index, columns=df.columns)
-        expected["A"].loc["a"] = True
+        expected.loc["a", "A"] = True
         expected.loc["d"] = True
         result = df.isin(s)
         tm.assert_frame_equal(result, expected)

From b33aaf2f4ae389760d38a930a8357aaae0b6771f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 4 Aug 2021 15:06:01 +0200
Subject: [PATCH 12/21] fix DataFrame.fillna + more chained setitem

---
 pandas/core/generic.py                           | 6 ++++++
 pandas/tests/frame/methods/test_combine_first.py | 2 +-
 pandas/tests/frame/methods/test_cov_corr.py      | 6 ++++--
 pandas/tests/frame/test_arithmetic.py            | 3 ++-
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index ab1796b797612..cc2a4a4802d84 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6310,6 +6310,12 @@ def fillna(
                     obj = result[k]
                     downcast_k = downcast if not is_dict else downcast.get(k)
                     obj.fillna(v, limit=limit, inplace=True, downcast=downcast_k)
+                    if isinstance(self._mgr, ArrayManager):
+                        # CoW -> we need to assign filled Series back to obj
+                        # TODO(CoW) we should avoid this here, with inplace=False
+                        # we already copied the original obj, in theory no need
+                        # for copying on write again
+                        result[k] = obj
                 return result if not inplace else None
 
             elif not is_list_like(value):
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index dd91b32c8eb8c..5e5256e1d18c2 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -66,7 +66,7 @@ def test_combine_first(self, float_frame):
         assert (combined["A"][:10] == 1).all()
 
         # reverse overlap
-        tail["A"][:10] = 0
+        tail.iloc[:10, tail.columns.get_loc("A")] = 0
         combined = tail.combine_first(head)
         assert (combined["A"][:10] == 0).all()
 
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index 352d95156bf98..d01288114caf7 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -27,8 +27,10 @@ def test_cov(self, float_frame, float_string_frame):
 
         # with NAs
         frame = float_frame.copy()
-        frame["A"][:5] = np.nan
-        frame["B"][5:10] = np.nan
+        # frame["A"][:5] = np.nan
+        frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
+        # frame["B"][5:10] = np.nan
+        frame.iloc[5:10, frame.columns.get_loc("B")] = np.nan
         result = frame.cov(min_periods=len(frame) - 8)
         expected = frame.cov()
         expected.loc["A", "B"] = np.nan
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index afa9593807acc..c5d6a1b0a20a2 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -1183,7 +1183,8 @@ def test_combineFrame(self, float_frame, mixed_float_frame, mixed_int_frame):
         frame_copy = float_frame.reindex(float_frame.index[::2])
 
         del frame_copy["D"]
-        frame_copy["C"][:5] = np.nan
+        # adding NAs to column "C"
+        frame_copy.iloc[:5, frame_copy.columns.get_loc("C")] = np.nan
 
         added = float_frame + frame_copy
 

From f5da03f0a918d73c9bc10a746fef141b12d838f7 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 8 Aug 2021 10:25:43 +0200
Subject: [PATCH 13/21] reindex/take on columns to use copy-on-write

---
 pandas/core/frame.py                        |  2 +-
 pandas/core/generic.py                      |  6 ++----
 pandas/core/internals/array_manager.py      | 10 ++++++++--
 pandas/core/internals/managers.py           |  4 ++++
 pandas/tests/indexing/test_copy_on_write.py | 20 ++++++++++++++++++++
 5 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0729039fc2e17..4fc7f907480c5 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4726,7 +4726,7 @@ def set_axis(self, labels, axis: Axis = 0, inplace: bool = False):
         "labels",
         [
             ("method", None),
-            ("copy", True),
+            ("copy", None),
             ("level", None),
             ("fill_value", np.nan),
             ("limit", None),
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 536b395fa7a50..ab739d96bfa4a 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4781,7 +4781,7 @@ def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries:
         axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
         method = missing.clean_reindex_fill_method(kwargs.pop("method", None))
         level = kwargs.pop("level", None)
-        copy = kwargs.pop("copy", True)
+        copy = kwargs.pop("copy", None)
         limit = kwargs.pop("limit", None)
         tolerance = kwargs.pop("tolerance", None)
         fill_value = kwargs.pop("fill_value", None)
@@ -4806,9 +4806,7 @@ def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries:
             for axis, ax in axes.items()
             if ax is not None
         ):
-            if copy:
-                return self.copy()
-            return self
+            return self.copy(deep=copy)
 
         # check if we are a multi reindex
         if self._needs_reindex_multi(axes, method, level):
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index b532835cd0594..209dc6cacb5cc 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -612,6 +612,10 @@ def _reindex_indexer(
 
         pandas-indexer with -1's only.
         """
+        if copy is None:
+            # use shallow copy
+            copy = False
+
         if indexer is None:
             if new_axis is self._axes[axis] and not copy:
                 return self
@@ -640,9 +644,11 @@ def _reindex_indexer(
                 else:
                     # reusing full column array -> track with reference
                     arr = self.arrays[i]
-                    ref = weakref.ref(arr)
                     if copy:
                         arr = arr.copy()
+                        ref = None
+                    else:
+                        ref = weakref.ref(arr)
                 new_arrays.append(arr)
                 refs.append(ref)
 
@@ -692,7 +698,7 @@ def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T:
 
         new_labels = self._axes[axis].take(indexer)
         return self._reindex_indexer(
-            new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True
+            new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True, copy=None
         )
 
     def _make_na_array(self, fill_value=None, use_na_proxy=False):
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index cbf58c4b6c1d7..d33f5df684494 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -645,6 +645,10 @@ def reindex_indexer(
 
         pandas-indexer with -1's only.
         """
+        if copy is None:
+            # preserve deep copy for BlockManager with copy=None
+            copy = True
+
         if indexer is None:
             if new_axis is self.axes[axis] and not copy:
                 return self
diff --git a/pandas/tests/indexing/test_copy_on_write.py b/pandas/tests/indexing/test_copy_on_write.py
index 780e9d11b107c..9f149b88b0707 100644
--- a/pandas/tests/indexing/test_copy_on_write.py
+++ b/pandas/tests/indexing/test_copy_on_write.py
@@ -121,6 +121,26 @@ def test_rename_columns_modify_parent(using_array_manager):
     tm.assert_frame_equal(df2, df2_orig)
 
 
+def test_reindex_columns(using_array_manager):
+    # Case: reindexing the column returns a new dataframe
+    # + afterwards modifying the result
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+    df2 = df.reindex(columns=["a", "c"])
+
+    if using_array_manager:
+        # still shares memory (df2 is a shallow copy)
+        assert np.may_share_memory(df2["a"].values, df["a"].values)
+    else:
+        assert not np.may_share_memory(df2["a"].values, df["a"].values)
+    # mutating df2 triggers a copy-on-write for that column
+    df2.iloc[0, 0] = 0
+    assert not np.may_share_memory(df2["a"].values, df["a"].values)
+    if using_array_manager:
+        assert np.may_share_memory(df2["c"].values, df["c"].values)
+    tm.assert_frame_equal(df, df_orig)
+
+
 # -----------------------------------------------------------------------------
 # Indexing operations taking subset + modifying the subset/parent
 

From c63275750b14ad9aab3d41fdc0c5e3157a4d1bca Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 8 Aug 2021 11:08:23 +0200
Subject: [PATCH 14/21] fix some typing issues

---
 pandas/core/frame.py                   |  3 +++
 pandas/core/internals/array_manager.py | 21 ++++++++++++++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4fc7f907480c5..65df053890620 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3803,6 +3803,9 @@ def _set_value(
             if takeable:
                 if isinstance(self._mgr, ArrayManager):
                     # with CoW, we can't use intermediate series
+                    # with takeable=True, we know that index is positional and
+                    # not a generic hashable label
+                    index = cast(int, index)
                     self._mgr.column_setitem(col, index, value)
                 else:
                     series = self._ixs(col, axis=1)
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 209dc6cacb5cc..d0e4f9a45e0f9 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -129,7 +129,7 @@ def __init__(
         self,
         arrays: list[np.ndarray | ExtensionArray],
         axes: list[Index],
-        refs: list[weakref.ref | None] | None,
+        refs: list[weakref.ref | None] | None = None,
         verify_integrity: bool = True,
     ):
         raise NotImplementedError
@@ -562,7 +562,12 @@ def copy_func(ax):
             new_arrays = list(self.arrays)
             refs = [weakref.ref(arr) for arr in self.arrays]
 
-        return type(self)(new_arrays, new_axes, refs, verify_integrity=False)
+        # error: Argument 3 to "BaseArrayManager" has incompatible type
+        # "Optional[List[ReferenceType[Union[ndarray[Any, Any], ExtensionArray]]]]";
+        # expected "Optional[List[Optional[ReferenceType[Any]]]]" [arg-type]
+        return type(self)(
+            new_arrays, new_axes, refs, verify_integrity=False  # type: ignore[arg-type]
+        )
 
     def reindex_indexer(
         self: T,
@@ -571,7 +576,7 @@ def reindex_indexer(
         axis: int,
         fill_value=None,
         allow_dups: bool = False,
-        copy: bool = True,
+        copy: bool | None = True,
         # ignored keywords
         consolidate: bool = True,
         only_slice: bool = False,
@@ -596,7 +601,7 @@ def _reindex_indexer(
         axis: int,
         fill_value=None,
         allow_dups: bool = False,
-        copy: bool = True,
+        copy: bool | None = True,
         use_na_proxy: bool = False,
     ) -> T:
         """
@@ -632,6 +637,7 @@ def _reindex_indexer(
         if axis >= self.ndim:
             raise IndexError("Requested axis not found in manager")
 
+        refs: list[weakref.ref | None] | None = None
         if axis == 1:
             new_arrays = []
             refs = []
@@ -830,7 +836,12 @@ def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager:
         new_axes = list(self._axes)
         new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
 
-        return type(self)(arrays, new_axes, refs, verify_integrity=False)
+        # error: Argument 3 to "ArrayManager" has incompatible type
+        # "List[ReferenceType[Union[ndarray[Any, Any], ExtensionArray]]]";
+        # expected "Optional[List[Optional[ReferenceType[Any]]]]" [arg-type]
+        return type(self)(
+            arrays, new_axes, refs, verify_integrity=False  # type: ignore[arg-type]
+        )
 
     def iget(self, i: int) -> SingleArrayManager:
         """

From e4a5f332ef505cfde59ef6908ba105b464c8eb6f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 8 Aug 2021 11:10:18 +0200
Subject: [PATCH 15/21] fix dtype in test for windows

---
 pandas/tests/indexing/test_copy_on_write.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/indexing/test_copy_on_write.py b/pandas/tests/indexing/test_copy_on_write.py
index 9f149b88b0707..9524353818143 100644
--- a/pandas/tests/indexing/test_copy_on_write.py
+++ b/pandas/tests/indexing/test_copy_on_write.py
@@ -292,7 +292,7 @@ def test_subset_set_column(using_array_manager):
     subset = df[1:3]
 
     if using_array_manager:
-        subset["a"] = np.array([10, 11])
+        subset["a"] = np.array([10, 11], dtype="int64")
     else:
         with pd.option_context("chained_assignment", "warn"):
             with tm.assert_produces_warning(com.SettingWithCopyWarning):

From 5efad508093c64252d9278fff075829b059ffed0 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 8 Aug 2021 14:21:52 +0200
Subject: [PATCH 16/21] fix dtype in test for windows

---
 pandas/tests/indexing/test_copy_on_write.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/indexing/test_copy_on_write.py b/pandas/tests/indexing/test_copy_on_write.py
index 9524353818143..97ae0db23f519 100644
--- a/pandas/tests/indexing/test_copy_on_write.py
+++ b/pandas/tests/indexing/test_copy_on_write.py
@@ -296,7 +296,7 @@ def test_subset_set_column(using_array_manager):
     else:
         with pd.option_context("chained_assignment", "warn"):
             with tm.assert_produces_warning(com.SettingWithCopyWarning):
-                subset["a"] = np.array([10, 11])
+                subset["a"] = np.array([10, 11], dtype="int64")
 
     expected = pd.DataFrame(
         {"a": [10, 11], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3)

From 297662a378755431e5d0b0733ffc9f29dd5084e8 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 11 Nov 2021 20:56:37 +0100
Subject: [PATCH 17/21] [ArrayManager] Array version of putmask logic

---
 pandas/core/array_algos/putmask.py     | 97 +++++++++++++++++++++++++-
 pandas/core/internals/array_manager.py | 67 ++++++++++++++++--
 2 files changed, 156 insertions(+), 8 deletions(-)

diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py
index 77e38e6c6e3fc..6dce202e5ca7f 100644
--- a/pandas/core/array_algos/putmask.py
+++ b/pandas/core/array_algos/putmask.py
@@ -15,6 +15,7 @@
 )
 
 from pandas.core.dtypes.cast import (
+    can_hold_element,
     convert_scalar_for_putitemlike,
     find_common_type,
     infer_dtype_from,
@@ -22,11 +23,22 @@
 from pandas.core.dtypes.common import (
     is_float_dtype,
     is_integer_dtype,
+    is_interval_dtype,
     is_list_like,
 )
-from pandas.core.dtypes.missing import isna_compat
+from pandas.core.dtypes.generic import (
+    ABCDataFrame,
+    ABCIndex,
+    ABCSeries,
+)
+from pandas.core.dtypes.missing import (
+    is_valid_na_for_dtype,
+    isna_compat,
+    na_value_for_dtype,
+)
 
 from pandas.core.arrays import ExtensionArray
+from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
 
 
 def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) -> None:
@@ -122,7 +134,7 @@ def putmask_smart(values: np.ndarray, mask: npt.NDArray[np.bool_], new) -> np.nd
                 nv[mask] = nn_at
                 return nv
 
-    new = np.asarray(new)
+    # new = np.asarray(new)
 
     if values.dtype.kind == new.dtype.kind:
         # preserves dtype if possible
@@ -225,3 +237,84 @@ def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
                 other = list(other)
 
     return other
+
+
+def putmask_flexible_ndarray(array: np.ndarray, mask, new):
+    """
+    Putmask implementation for ArrayManager putmask for ndarray.
+
+    Flexible version that will upcast if needed.
+    """
+    mask, noop = validate_putmask(array, mask)
+    assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame))
+
+    # if we are passed a scalar None, convert it here
+    if not array.dtype == "object" and is_valid_na_for_dtype(new, array.dtype):
+        new = na_value_for_dtype(array.dtype, compat=False)
+
+    if can_hold_element(array, new):
+        # error: Argument 1 to "putmask_without_repeat" has incompatible type
+        # "Union[ndarray, ExtensionArray]"; expected "ndarray"
+        putmask_without_repeat(array, mask, new)  # type: ignore[arg-type]
+        return array
+
+    elif noop:
+        return array
+
+    dtype, _ = infer_dtype_from(new)
+    if dtype.kind in ["m", "M"]:
+        array = array.astype(object)
+        # convert to list to avoid numpy coercing datetimelikes to integers
+        new = setitem_datetimelike_compat(
+            array, mask.sum(), new  # type: ignore[arg-type]
+        )
+        # putmask_smart below converts it back to array
+        np.putmask(array, mask, new)
+        return array
+
+    new_values = putmask_smart(array, mask, new)
+    return new_values
+
+
+def _coerce_to_target_dtype(array, new):
+    dtype, _ = infer_dtype_from(new, pandas_dtype=True)
+    new_dtype = find_common_type([array.dtype, dtype])
+    return array.astype(new_dtype, copy=False)
+
+
+def putmask_flexible_ea(array: ExtensionArray, mask, new):
+    """
+    Putmask implementation for ArrayManager putmask for EA.
+
+    Flexible version that will upcast if needed.
+    """
+    mask = extract_bool_array(mask)
+
+    if isinstance(array, NDArrayBackedExtensionArray):
+
+        if not can_hold_element(array, new):
+            array = _coerce_to_target_dtype(array, new)
+            return putmask_flexible_ndarray(array, mask, new)
+
+        array.putmask(mask, new)
+        return array
+
+    if isinstance(new, (np.ndarray, ExtensionArray)) and len(new) == len(mask):
+        new = new[mask]
+
+    try:
+        array[mask] = new
+    except TypeError:
+        if not is_interval_dtype(array.dtype):
+            # Discussion about what we want to support in the general
+            #  case GH#39584
+            raise
+
+        array = _coerce_to_target_dtype(array, new)
+        if array.dtype == np.dtype("object"):
+            # For now at least, only support casting e.g.
+            #  Interval[int64]->Interval[float64],
+            raise
+        return putmask_flexible_ea(array, mask, new)
+
+    return array
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 6362ffa2851ff..f09fa76fddfd9 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -55,6 +55,10 @@
 )
 
 import pandas.core.algorithms as algos
+from pandas.core.array_algos.putmask import (
+    putmask_flexible_ea,
+    putmask_flexible_ndarray,
+)
 from pandas.core.array_algos.quantile import quantile_compat
 from pandas.core.array_algos.take import take_1d
 from pandas.core.arrays import (
@@ -378,13 +382,64 @@ def putmask(self, mask, new, align: bool = True):
         else:
             align_keys = ["mask"]
             new = extract_array(new, extract_numpy=True)
+        # if self._has_no_reference(loc):
+        # breakpoint()
+        
+        mask = np.asarray(mask)
+        for i in range(len(self.arrays)):
+            if not self._has_no_reference(i):
+                # if being referenced -> perform Copy-on-Write and clear the reference
+                self.arrays[i] = self.arrays[i].copy()
+                self._clear_reference(i)
+            self.arrays[i][mask[:, i]] = new
 
-        return self.apply_with_block(
-            "putmask",
-            align_keys=align_keys,
-            mask=mask,
-            new=new,
-        )
+        return self
+
+        # if align:
+        #     align_keys = ["new", "mask"]
+        # else:
+        #     align_keys = ["mask"]
+        #     new = extract_array(new, extract_numpy=True)
+
+
+    def putmask(self, mask, new, align: bool = True):
+        if align:
+            align_keys = ["new", "mask"]
+        else:
+            align_keys = ["mask"]
+            new = extract_array(new, extract_numpy=True)
+
+        kwargs = {"mask": mask, "new": new}
+        aligned_kwargs = {k: kwargs[k] for k in align_keys}
+
+        for i in range(len(self.arrays)):
+            
+            if not self._has_no_reference(i):
+                # if being referenced -> perform Copy-on-Write and clear the reference
+                self.arrays[i] = self.arrays[i].copy()
+                self._clear_reference(i)
+
+            for k, obj in aligned_kwargs.items():
+                if isinstance(obj, (ABCSeries, ABCDataFrame)):
+                    # The caller is responsible for ensuring that
+                    #  obj.axes[-1].equals(self.items)
+                    if obj.ndim == 1:
+                        kwargs[k] = obj._values
+                    else:
+                        kwargs[k] = obj.iloc[:, i]._values
+                else:
+                    # otherwise we have an ndarray
+                    if self.ndim == 2:
+                        kwargs[k] = obj[i]
+
+            arr = self.arrays[i]
+            if isinstance(arr, np.ndarray):
+                new = putmask_flexible_ndarray(arr, **kwargs)
+            else:
+                new = putmask_flexible_ea(arr, **kwargs)
+            self.arrays[i] = new
+
+        return self
 
     def diff(self: T, n: int, axis: int) -> T:
         if axis == 1:

From a011a0685f4d42611f38eb7e5eafaf320bc6c79c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 11 Nov 2021 21:01:52 +0100
Subject: [PATCH 18/21] fixup copy-on-write in putmask

---
 pandas/core/indexing.py                     |  3 +-
 pandas/core/internals/array_manager.py      | 29 +---------
 pandas/tests/indexing/test_copy_on_write.py | 61 +++++++++++++++++++++
 3 files changed, 64 insertions(+), 29 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 35be629f1f22c..4a6c21a9aaced 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1840,7 +1840,8 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
         pi = plane_indexer
 
         if not hasattr(self.obj._mgr, "blocks"):
-            # ArrayManager
+            # ArrayManager: in this case we cannot rely on getting the column
+            # as a Series to mutate, but need to operated on the mgr directly
             if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)):
                 arr = self.obj._sanitize_column(value)
                 self.obj._mgr.iset(loc, arr)
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index f09fa76fddfd9..1e4f025b112a4 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -375,33 +375,6 @@ def where(self: T, other, cond, align: bool) -> T:
     # def setitem(self, indexer, value) -> ArrayManager:
     #     return self.apply_with_block("setitem", indexer=indexer, value=value)
 
-    def putmask(self, mask, new, align: bool = True):
-        # TODO(CoW) check references to copy if needed
-        if align:
-            align_keys = ["new", "mask"]
-        else:
-            align_keys = ["mask"]
-            new = extract_array(new, extract_numpy=True)
-        # if self._has_no_reference(loc):
-        # breakpoint()
-        
-        mask = np.asarray(mask)
-        for i in range(len(self.arrays)):
-            if not self._has_no_reference(i):
-                # if being referenced -> perform Copy-on-Write and clear the reference
-                self.arrays[i] = self.arrays[i].copy()
-                self._clear_reference(i)
-            self.arrays[i][mask[:, i]] = new
-
-        return self
-
-        # if align:
-        #     align_keys = ["new", "mask"]
-        # else:
-        #     align_keys = ["mask"]
-        #     new = extract_array(new, extract_numpy=True)
-
-
     def putmask(self, mask, new, align: bool = True):
         if align:
             align_keys = ["new", "mask"]
@@ -413,7 +386,7 @@ def putmask(self, mask, new, align: bool = True):
         aligned_kwargs = {k: kwargs[k] for k in align_keys}
 
         for i in range(len(self.arrays)):
-            
+
             if not self._has_no_reference(i):
                 # if being referenced -> perform Copy-on-Write and clear the reference
                 self.arrays[i] = self.arrays[i].copy()
diff --git a/pandas/tests/indexing/test_copy_on_write.py b/pandas/tests/indexing/test_copy_on_write.py
index 97ae0db23f519..53b05f4da0e7a 100644
--- a/pandas/tests/indexing/test_copy_on_write.py
+++ b/pandas/tests/indexing/test_copy_on_write.py
@@ -285,6 +285,35 @@ def test_subset_set_with_row_indexer(indexer_si, indexer, using_array_manager):
         tm.assert_frame_equal(df, df_orig)
 
 
+def test_subset_set_with_mask(using_array_manager):
+    # Case: setting values with a mask on a viewing subset: subset[mask] = value
+    df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]})
+    df_orig = df.copy()
+    subset = df[1:4]
+
+    mask = subset > 3
+
+    if using_array_manager:
+        subset[mask] = 0
+    else:
+        with pd.option_context("chained_assignment", "warn"):
+            with tm.assert_produces_warning(com.SettingWithCopyWarning):
+                subset[mask] = 0
+
+    expected = pd.DataFrame(
+        {"a": [2, 3, 0], "b": [0, 0, 0], "c": [0.20, 0.3, 0.4]}, index=range(1, 4)
+    )
+    tm.assert_frame_equal(subset, expected)
+    if using_array_manager:
+        # original parent dataframe is not modified (CoW)
+        tm.assert_frame_equal(df, df_orig)
+    else:
+        # original parent dataframe is actually updated
+        df_orig.loc[3, "a"] = 0
+        df_orig.loc[1:3, "b"] = 0
+        tm.assert_frame_equal(df, df_orig)
+
+
 def test_subset_set_column(using_array_manager):
     # Case: setting a single column on a viewing subset -> subset[col] = value
     df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
@@ -421,6 +450,38 @@ def test_series_getitem_slice(using_array_manager):
         assert s.iloc[0] == 0
 
 
+@pytest.mark.parametrize(
+    "indexer",
+    [slice(0, 2), np.array([True, True, False]), np.array([0, 1])],
+    ids=["slice", "mask", "array"],
+)
+def test_series_subset_set_with_indexer(indexer_si, indexer, using_array_manager):
+    # Case: setting values in a viewing Series with an indexer
+    s = pd.Series([1, 2, 3], index=["a", "b", "c"])
+    s_orig = s.copy()
+    subset = s[:]
+
+    # if (
+    #     indexer_si is tm.setitem
+    #     and isinstance(indexer, np.ndarray)
+    #     and indexer.dtype == "int"
+    # ):
+    #     pytest.skip("setitem with labels selects on columns")
+
+    expected = pd.Series([0, 0, 3], index=["a", "b", "c"])
+    indexer_si(subset)[indexer] = 0
+    tm.assert_series_equal(subset, expected)
+
+    if using_array_manager:
+        tm.assert_series_equal(s, s_orig)
+    else:
+        tm.assert_series_equal(s, expected)
+
+    expected = pd.DataFrame(
+        {"a": [0, 0, 4], "b": [0, 0, 7], "c": [0.0, 0.0, 0.4]}, index=range(1, 4)
+    )
+
+
 # -----------------------------------------------------------------------------
 # del operator
 

From cc090019ebf5b65c25d548a2ef28a764e187c971 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 11 Nov 2021 21:08:17 +0100
Subject: [PATCH 19/21] Update BlockManager expected results after recent
 behaviour changes

---
 pandas/tests/indexing/test_copy_on_write.py | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/pandas/tests/indexing/test_copy_on_write.py b/pandas/tests/indexing/test_copy_on_write.py
index 53b05f4da0e7a..ce5c6e007a3d5 100644
--- a/pandas/tests/indexing/test_copy_on_write.py
+++ b/pandas/tests/indexing/test_copy_on_write.py
@@ -331,11 +331,7 @@ def test_subset_set_column(using_array_manager):
         {"a": [10, 11], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3)
     )
     tm.assert_frame_equal(subset, expected)
-    if using_array_manager:
-        tm.assert_frame_equal(df, df_orig)
-    else:
-        df_orig.loc[1:2, "a"] = np.array([10, 11])
-        tm.assert_frame_equal(df, df_orig)
+    tm.assert_frame_equal(df, df_orig)
 
 
 def test_subset_set_columns_single_block(using_array_manager):
@@ -354,11 +350,7 @@ def test_subset_set_columns_single_block(using_array_manager):
 
     expected = pd.DataFrame({"a": [0, 0], "b": [5, 6], "c": [0, 0]}, index=range(1, 3))
     tm.assert_frame_equal(subset, expected)
-    if using_array_manager:
-        tm.assert_frame_equal(df, df_orig)
-    else:
-        df_orig.loc[1:2, ["a", "c"]] = 0
-        tm.assert_frame_equal(df, df_orig)
+    tm.assert_frame_equal(df, df_orig)
 
 
 def test_subset_set_columns_mixed_block(using_array_manager):
@@ -377,13 +369,7 @@ def test_subset_set_columns_mixed_block(using_array_manager):
 
     expected = pd.DataFrame({"a": [0, 0], "b": [5, 6], "c": [0, 0]}, index=range(1, 3))
     tm.assert_frame_equal(subset, expected)
-    if using_array_manager:
-        tm.assert_frame_equal(df, df_orig)
-    else:
-        # In the mixed case with BlockManager, only one of the two columns is
-        # mutated in the parent frame ..
-        df_orig.loc[1:2, ["a"]] = 0
-        tm.assert_frame_equal(df, df_orig)
+    tm.assert_frame_equal(df, df_orig)
 
 
 @pytest.mark.parametrize(

From 37e7ce0b108a56047b937605750bb68861fd73f4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 22 Nov 2021 11:29:47 +0100
Subject: [PATCH 20/21] limit changes to putmask for this PR

---
 pandas/core/array_algos/putmask.py     | 134 ++-----------------------
 pandas/core/internals/array_manager.py |  35 ++-----
 2 files changed, 16 insertions(+), 153 deletions(-)

diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py
index 6dce202e5ca7f..1f37e0e5d249a 100644
--- a/pandas/core/array_algos/putmask.py
+++ b/pandas/core/array_algos/putmask.py
@@ -4,7 +4,6 @@
 from __future__ import annotations
 
 from typing import Any
-import warnings
 
 import numpy as np
 
@@ -20,25 +19,9 @@
     find_common_type,
     infer_dtype_from,
 )
-from pandas.core.dtypes.common import (
-    is_float_dtype,
-    is_integer_dtype,
-    is_interval_dtype,
-    is_list_like,
-)
-from pandas.core.dtypes.generic import (
-    ABCDataFrame,
-    ABCIndex,
-    ABCSeries,
-)
-from pandas.core.dtypes.missing import (
-    is_valid_na_for_dtype,
-    isna_compat,
-    na_value_for_dtype,
-)
+from pandas.core.dtypes.common import is_list_like
 
 from pandas.core.arrays import ExtensionArray
-from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
 
 
 def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) -> None:
@@ -87,7 +70,7 @@ def putmask_smart(values: np.ndarray, mask: npt.NDArray[np.bool_], new) -> np.nd
         `values`, updated in-place.
     mask : np.ndarray[bool]
         Applies to both sides (array like).
-    new : `new values` either scalar or an array like aligned with `values`
+    new : listlike `new values` aligned with `values`
 
     Returns
     -------
@@ -101,9 +84,6 @@ def putmask_smart(values: np.ndarray, mask: npt.NDArray[np.bool_], new) -> np.nd
     # we cannot use np.asarray() here as we cannot have conversions
     # that numpy does when numeric are mixed with strings
 
-    if not is_list_like(new):
-        new = np.broadcast_to(new, mask.shape)
-
     # see if we are only masking values that if putted
     # will work in the current dtype
     try:
@@ -112,29 +92,14 @@ def putmask_smart(values: np.ndarray, mask: npt.NDArray[np.bool_], new) -> np.nd
         # TypeError: only integer scalar arrays can be converted to a scalar index
         pass
     else:
-        # make sure that we have a nullable type if we have nulls
-        if not isna_compat(values, nn[0]):
-            pass
-        elif not (is_float_dtype(nn.dtype) or is_integer_dtype(nn.dtype)):
-            # only compare integers/floats
-            pass
-        elif not (is_float_dtype(values.dtype) or is_integer_dtype(values.dtype)):
-            # only compare integers/floats
-            pass
-        else:
-
-            # we ignore ComplexWarning here
-            with warnings.catch_warnings(record=True):
-                warnings.simplefilter("ignore", np.ComplexWarning)
-                nn_at = nn.astype(values.dtype)
+        # We only get to putmask_smart when we cannot hold 'new' in values.
+        #  The "smart" part of putmask_smart is checking if we can hold new[mask]
+        #  in values, in which case we can still avoid the need to cast.
+        if can_hold_element(values, nn):
+            values[mask] = nn
+            return values
 
-            comp = nn == nn_at
-            if is_list_like(comp) and comp.all():
-                nv = values.copy()
-                nv[mask] = nn_at
-                return nv
-
-    # new = np.asarray(new)
+    new = np.asarray(new)
 
     if values.dtype.kind == new.dtype.kind:
         # preserves dtype if possible
@@ -237,84 +202,3 @@ def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
                 other = list(other)
 
     return other
-
-
-def putmask_flexible_ndarray(array: np.ndarray, mask, new):
-    """
-    Putmask implementation for ArrayManager putmask for ndarray.
-
-    Flexible version that will upcast if needed.
-    """
-    mask, noop = validate_putmask(array, mask)
-    assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame))
-
-    # if we are passed a scalar None, convert it here
-    if not array.dtype == "object" and is_valid_na_for_dtype(new, array.dtype):
-        new = na_value_for_dtype(array.dtype, compat=False)
-
-    if can_hold_element(array, new):
-        # error: Argument 1 to "putmask_without_repeat" has incompatible type
-        # "Union[ndarray, ExtensionArray]"; expected "ndarray"
-        putmask_without_repeat(array, mask, new)  # type: ignore[arg-type]
-        return array
-
-    elif noop:
-        return array
-
-    dtype, _ = infer_dtype_from(new)
-    if dtype.kind in ["m", "M"]:
-        array = array.astype(object)
-        # convert to list to avoid numpy coercing datetimelikes to integers
-        new = setitem_datetimelike_compat(
-            array, mask.sum(), new  # type: ignore[arg-type]
-        )
-        # putmask_smart below converts it back to array
-        np.putmask(array, mask, new)
-        return array
-
-    new_values = putmask_smart(array, mask, new)
-    return new_values
-
-
-def _coerce_to_target_dtype(array, new):
-    dtype, _ = infer_dtype_from(new, pandas_dtype=True)
-    new_dtype = find_common_type([array.dtype, dtype])
-    return array.astype(new_dtype, copy=False)
-
-
-def putmask_flexible_ea(array: ExtensionArray, mask, new):
-    """
-    Putmask implementation for ArrayManager putmask for EA.
-
-    Flexible version that will upcast if needed.
-    """
-    mask = extract_bool_array(mask)
-
-    if isinstance(array, NDArrayBackedExtensionArray):
-
-        if not can_hold_element(array, new):
-            array = _coerce_to_target_dtype(array, new)
-            return putmask_flexible_ndarray(array, mask, new)
-
-        array.putmask(mask, new)
-        return array
-
-    if isinstance(new, (np.ndarray, ExtensionArray)) and len(new) == len(mask):
-        new = new[mask]
-
-    try:
-        array[mask] = new
-    except TypeError:
-        if not is_interval_dtype(array.dtype):
-            # Discussion about what we want to support in the general
-            #  case GH#39584
-            raise
-
-        array = _coerce_to_target_dtype(array, new)
-        if array.dtype == np.dtype("object"):
-            # For now at least, only support casting e.g.
-            #  Interval[int64]->Interval[float64],
-            raise
-        return putmask_flexible_ea(array, mask, new)
-
-    return array
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 1e4f025b112a4..de03c04050de0 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -55,10 +55,6 @@
 )
 
 import pandas.core.algorithms as algos
-from pandas.core.array_algos.putmask import (
-    putmask_flexible_ea,
-    putmask_flexible_ndarray,
-)
 from pandas.core.array_algos.quantile import quantile_compat
 from pandas.core.array_algos.take import take_1d
 from pandas.core.arrays import (
@@ -382,36 +378,19 @@ def putmask(self, mask, new, align: bool = True):
             align_keys = ["mask"]
             new = extract_array(new, extract_numpy=True)
 
-        kwargs = {"mask": mask, "new": new}
-        aligned_kwargs = {k: kwargs[k] for k in align_keys}
-
         for i in range(len(self.arrays)):
-
             if not self._has_no_reference(i):
                 # if being referenced -> perform Copy-on-Write and clear the reference
                 self.arrays[i] = self.arrays[i].copy()
                 self._clear_reference(i)
 
-            for k, obj in aligned_kwargs.items():
-                if isinstance(obj, (ABCSeries, ABCDataFrame)):
-                    # The caller is responsible for ensuring that
-                    #  obj.axes[-1].equals(self.items)
-                    if obj.ndim == 1:
-                        kwargs[k] = obj._values
-                    else:
-                        kwargs[k] = obj.iloc[:, i]._values
-                else:
-                    # otherwise we have an ndarray
-                    if self.ndim == 2:
-                        kwargs[k] = obj[i]
-
-            arr = self.arrays[i]
-            if isinstance(arr, np.ndarray):
-                new = putmask_flexible_ndarray(arr, **kwargs)
-            else:
-                new = putmask_flexible_ea(arr, **kwargs)
-            self.arrays[i] = new
-
+        new_mgr = self.apply_with_block(
+            "putmask",
+            align_keys=align_keys,
+            mask=mask,
+            new=new,
+        )
+        self.arrays = new_mgr.arrays
         return self
 
     def diff(self: T, n: int, axis: int) -> T:

From 0f28095bb6011fb3d9f7ce8232428b87fe08b832 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 26 Nov 2021 14:09:12 +0100
Subject: [PATCH 21/21] address feedback

---
 pandas/core/internals/array_manager.py        | 33 +++++++------------
 pandas/tests/frame/indexing/test_xs.py        |  3 +-
 pandas/tests/frame/methods/test_cov_corr.py   |  2 --
 .../indexing/test_chaining_and_caching.py     |  1 -
 pandas/tests/indexing/test_scalar.py          | 10 +-----
 5 files changed, 14 insertions(+), 35 deletions(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 52717e1fab39d..47abcf68243a2 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -171,7 +171,7 @@ def set_axis(self, axis: int, new_labels: Index) -> None:
         axis = self._normalize_axis(axis)
         self._axes[axis] = new_labels
 
-    def _has_no_reference(self, i: int):
+    def _has_no_reference(self, i: int) -> bool:
         """
         Check for column `i` if has references.
         (whether it references another array or is itself being referenced)
@@ -182,7 +182,7 @@ def _has_no_reference(self, i: int):
             self.arrays[i]
         ) == 0
 
-    def _clear_reference(self, i: int):
+    def _clear_reference(self, i: int) -> None:
         """
         Clear any reference for column `i`.
         """
@@ -539,8 +539,8 @@ def copy(self: T, deep=True) -> T:
 
         Parameters
         ----------
-        deep : bool or string, default True
-            If False, return shallow copy (do not copy data)
+        deep : bool, string or None, default True
+            If False or None, return a shallow copy (do not copy data).
             If 'all', copy data and a deep copy of the index
 
         Returns
@@ -567,14 +567,9 @@ def copy_func(ax):
             refs = None
         else:
             new_arrays = list(self.arrays)
-            refs = [weakref.ref(arr) for arr in self.arrays]
+            refs: list[weakref.ref | None] = [weakref.ref(arr) for arr in self.arrays]
 
-        # error: Argument 3 to "BaseArrayManager" has incompatible type
-        # "Optional[List[ReferenceType[Union[ndarray[Any, Any], ExtensionArray]]]]";
-        # expected "Optional[List[Optional[ReferenceType[Any]]]]" [arg-type]
-        return type(self)(
-            new_arrays, new_axes, refs, verify_integrity=False  # type: ignore[arg-type]
-        )
+        return type(self)(new_arrays, new_axes, refs, verify_integrity=False)
 
     def reindex_indexer(
         self: T,
@@ -619,8 +614,8 @@ def _reindex_indexer(
         axis : int
         fill_value : object, default None
         allow_dups : bool, default False
-        copy : bool, default True
-
+        copy : bool or None, default True
+            If None, regard as False to get shallow copy.
 
         pandas-indexer with -1's only.
         """
@@ -792,7 +787,7 @@ def _verify_integrity(self) -> None:
                     f"{arr.ndim} dimensions instead."
                 )
         if self.refs is not None:
-            if not len(self.refs) == n_columns:
+            if len(self.refs) != n_columns:
                 raise ValueError(
                     "Number of passed refs must equal the size of the column Index: "
                     f"{len(self.refs)} refs vs {n_columns} columns."
@@ -830,10 +825,11 @@ def fast_xs(self, loc: int) -> ArrayLike:
     def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager:
         axis = self._normalize_axis(axis)
 
+        refs: list[weakref.ref | None]
         if axis == 0:
             arrays = [arr[slobj] for arr in self.arrays]
             # slicing results in views -> track references to original arrays
-            # TODO possible to optimizate this with single ref to the full ArrayManager?
+            # TODO possible to optimize this with single ref to the full ArrayManager?
             refs = [weakref.ref(arr) for arr in self.arrays]
         elif axis == 1:
             arrays = self.arrays[slobj]
@@ -843,12 +839,7 @@ def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager:
         new_axes = list(self._axes)
         new_axes[axis] = new_axes[axis]._getitem_slice(slobj)
 
-        # error: Argument 3 to "ArrayManager" has incompatible type
-        # "List[ReferenceType[Union[ndarray[Any, Any], ExtensionArray]]]";
-        # expected "Optional[List[Optional[ReferenceType[Any]]]]" [arg-type]
-        return type(self)(
-            arrays, new_axes, refs, verify_integrity=False  # type: ignore[arg-type]
-        )
+        return type(self)(arrays, new_axes, refs, verify_integrity=False)
 
     def iget(self, i: int) -> SingleArrayManager:
         """
diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py
index 93b6a8c8b04ba..3a1f9d8a527d8 100644
--- a/pandas/tests/frame/indexing/test_xs.py
+++ b/pandas/tests/frame/indexing/test_xs.py
@@ -116,13 +116,12 @@ def test_xs_view(self, using_array_manager):
 
         dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5))
 
+        dm.xs(2)[:] = 20
         if using_array_manager:
             # INFO(ArrayManager) with ArrayManager getting a row as a view is
             # not possible
-            dm.xs(2)[:] = 20
             assert not (dm.xs(2) == 20).any()
         else:
-            dm.xs(2)[:] = 20
             assert (dm.xs(2) == 20).all()
 
 
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index 64d5307eeb6f2..b551cfc500e7a 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -27,9 +27,7 @@ def test_cov(self, float_frame, float_string_frame):
 
         # with NAs
         frame = float_frame.copy()
-        # frame["A"][:5] = np.nan
         frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
-        # frame["B"][5:10] = np.nan
         frame.iloc[5:10, frame.columns.get_loc("B")] = np.nan
         result = frame.cov(min_periods=len(frame) - 8)
         expected = frame.cov()
diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py
index 8eba27a907280..f3c06e04c81f6 100644
--- a/pandas/tests/indexing/test_chaining_and_caching.py
+++ b/pandas/tests/indexing/test_chaining_and_caching.py
@@ -419,7 +419,6 @@ def test_detect_chained_assignment_changing_dtype(self, using_array_manager):
             with pytest.raises(com.SettingWithCopyError, match=msg):
                 df["C"][2] = "foo"
         else:
-            # breakpoint()
             # TODO(CoW) can we still warn here?
             # df.loc[2]["D"] = "foo"
             # df.loc[2]["C"] = "foo"
diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py
index c4fc604bd8f3c..bcb76fb078e74 100644
--- a/pandas/tests/indexing/test_scalar.py
+++ b/pandas/tests/indexing/test_scalar.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 from pandas import (
     DataFrame,
     Series,
@@ -48,11 +46,7 @@ def _check(f, func, values=False):
         for f in [d["ints"], d["uints"], d["labels"], d["ts"], d["floats"]]:
             _check(f, "at")
 
-    # TODO(CoW) fix at/iat setting on a DataFrame
-    @pytest.mark.parametrize(
-        "kind",
-        ["series", pytest.param("frame", marks=td.skip_array_manager_invalid_test)],
-    )
+    @pytest.mark.parametrize("kind", ["series", "frame"])
     def test_at_and_iat_set(self, kind):
         def _check(f, func, values=False):
 
@@ -220,8 +214,6 @@ def test_mixed_index_at_iat_loc_iloc_dataframe(self):
         with pytest.raises(KeyError, match="^3$"):
             df.loc[0, 3]
 
-    # TODO(CoW) fix at/iat setting on a DataFrame
-    @td.skip_array_manager_invalid_test
     def test_iat_setter_incompatible_assignment(self):
         # GH 23236
         result = DataFrame({"a": [0, 1], "b": [4, 5]})