From e977432f5dda08d5eaf0890161cd0304c4eac209 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Wed, 2 Sep 2020 22:00:00 -0500
Subject: [PATCH] Optionally disallow duplicate labels (#28394)

---
 doc/source/reference/frame.rst                |  16 +
 .../reference/general_utility_functions.rst   |   1 +
 doc/source/reference/series.rst               |  15 +
 doc/source/user_guide/duplicates.rst          | 210 ++++++++
 doc/source/user_guide/index.rst               |   1 +
 doc/source/whatsnew/v1.2.0.rst                |  49 ++
 pandas/__init__.py                            |   1 +
 pandas/_testing.py                            |  15 +
 pandas/core/api.py                            |   1 +
 pandas/core/flags.py                          | 113 +++++
 pandas/core/frame.py                          |  11 +-
 pandas/core/generic.py                        | 130 ++++-
 pandas/core/indexes/base.py                   |  48 +-
 pandas/core/series.py                         |  10 +-
 pandas/errors/__init__.py                     |  21 +
 pandas/tests/api/test_api.py                  |   1 +
 pandas/tests/base/test_misc.py                |   3 +-
 pandas/tests/frame/test_api.py                |  27 ++
 pandas/tests/generic/test_duplicate_labels.py | 450 ++++++++++++++++++
 pandas/tests/generic/test_generic.py          |  10 +
 pandas/tests/series/test_api.py               |  26 +
 pandas/tests/test_flags.py                    |  48 ++
 pandas/tests/util/test_assert_frame_equal.py  |  15 +
 pandas/tests/util/test_assert_series_equal.py |  15 +
 24 files changed, 1227 insertions(+), 10 deletions(-)
 create mode 100644 doc/source/user_guide/duplicates.rst
 create mode 100644 pandas/core/flags.py
 create mode 100644 pandas/tests/generic/test_duplicate_labels.py
 create mode 100644 pandas/tests/test_flags.py

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index 4d9d18e3d204e..9a1ebc8d670dc 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -37,6 +37,7 @@ Attributes and underlying data
    DataFrame.shape
    DataFrame.memory_usage
    DataFrame.empty
+   DataFrame.set_flags
 
 Conversion
 ~~~~~~~~~~
@@ -276,6 +277,21 @@ Time Series-related
    DataFrame.tz_convert
    DataFrame.tz_localize
 
+.. _api.frame.flags:
+
+Flags
+~~~~~
+
+Flags refer to attributes of the pandas object. Properties of the dataset (like
+the date is was recorded, the URL it was accessed from, etc.) should be stored
+in :attr:`DataFrame.attrs`.
+
+.. autosummary::
+   :toctree: api/
+
+   Flags
+
+
 .. _api.frame.metadata:
 
 Metadata
diff --git a/doc/source/reference/general_utility_functions.rst b/doc/source/reference/general_utility_functions.rst
index c1759110b94ad..3cba0a81a7011 100644
--- a/doc/source/reference/general_utility_functions.rst
+++ b/doc/source/reference/general_utility_functions.rst
@@ -37,6 +37,7 @@ Exceptions and warnings
 
    errors.AccessorRegistrationWarning
    errors.DtypeWarning
+   errors.DuplicateLabelError
    errors.EmptyDataError
    errors.InvalidIndexError
    errors.MergeError
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
index ae3e121ca8212..5131d35334693 100644
--- a/doc/source/reference/series.rst
+++ b/doc/source/reference/series.rst
@@ -39,6 +39,8 @@ Attributes
    Series.empty
    Series.dtypes
    Series.name
+   Series.flags
+   Series.set_flags
 
 Conversion
 ----------
@@ -527,6 +529,19 @@ Sparse-dtype specific methods and attributes are provided under the
    Series.sparse.from_coo
    Series.sparse.to_coo
 
+.. _api.series.flags:
+
+Flags
+~~~~~
+
+Flags refer to attributes of the pandas object. Properties of the dataset (like
+the date is was recorded, the URL it was accessed from, etc.) should be stored
+in :attr:`Series.attrs`.
+
+.. autosummary::
+   :toctree: api/
+
+   Flags
 
 .. _api.series.metadata:
 
diff --git a/doc/source/user_guide/duplicates.rst b/doc/source/user_guide/duplicates.rst
new file mode 100644
index 0000000000000..b65822fab2b23
--- /dev/null
+++ b/doc/source/user_guide/duplicates.rst
@@ -0,0 +1,210 @@
+.. _duplicates:
+
+****************
+Duplicate Labels
+****************
+
+:class:`Index` objects are not required to be unique; you can have duplicate row
+or column labels. This may be a bit confusing at first. If you're familiar with
+SQL, you know that row labels are similar to a primary key on a table, and you
+would never want duplicates in a SQL table. But one of pandas' roles is to clean
+messy, real-world data before it goes to some downstream system. And real-world
+data has duplicates, even in fields that are supposed to be unique.
+
+This section describes how duplicate labels change the behavior of certain
+operations, and how prevent duplicates from arising during operations, or to
+detect them if they do.
+
+.. ipython:: python
+
+   import pandas as pd
+   import numpy as np
+
+Consequences of Duplicate Labels
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some pandas methods (:meth:`Series.reindex` for example) just don't work with
+duplicates present. The output can't be determined, and so pandas raises.
+
+.. ipython:: python
+   :okexcept:
+
+   s1 = pd.Series([0, 1, 2], index=['a', 'b', 'b'])
+   s1.reindex(['a', 'b', 'c'])
+
+Other methods, like indexing, can give very surprising results. Typically
+indexing with a scalar will *reduce dimensionality*. Slicing a ``DataFrame``
+with a scalar will return a ``Series``. Slicing a ``Series`` with a scalar will
+return a scalar. But with duplicates, this isn't the case.
+
+.. ipython:: python
+
+   df1 = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns=['A', 'A', 'B'])
+   df1
+
+We have duplicates in the columns. If we slice ``'B'``, we get back a ``Series``
+
+.. ipython:: python
+
+   df1['B']  # a series
+
+But slicing ``'A'`` returns a ``DataFrame``
+
+
+.. ipython:: python
+
+   df1['A']  # a DataFrame
+
+This applies to row labels as well
+
+.. ipython:: python
+
+   df2 = pd.DataFrame({"A": [0, 1, 2]}, index=['a', 'a', 'b'])
+   df2
+   df2.loc['b', 'A']  # a scalar
+   df2.loc['a', 'A']  # a Series
+
+Duplicate Label Detection
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can check whether an :class:`Index` (storing the row or column labels) is
+unique with :attr:`Index.is_unique`:
+
+.. ipython:: python
+
+   df2
+   df2.index.is_unique
+   df2.columns.is_unique
+
+.. note::
+
+   Checking whether an index is unique is somewhat expensive for large datasets.
+   Pandas does cache this result, so re-checking on the same index is very fast.
+
+:meth:`Index.duplicated` will return a boolean ndarray indicating whether a
+label is repeated.
+
+.. ipython:: python
+
+   df2.index.duplicated()
+
+Which can be used as a boolean filter to drop duplicate rows.
+
+.. ipython:: python
+
+   df2.loc[~df2.index.duplicated(), :]
+
+If you need additional logic to handle duplicate labels, rather than just
+dropping the repeats, using :meth:`~DataFrame.groupby` on the index is a common
+trick. For example, we'll resolve duplicates by taking the average of all rows
+with the same label.
+
+.. ipython:: python
+
+   df2.groupby(level=0).mean()
+
+.. _duplicates.disallow:
+
+Disallowing Duplicate Labels
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 1.2.0
+
+As noted above, handling duplicates is an important feature when reading in raw
+data. That said, you may want to avoid introducing duplicates as part of a data
+processing pipeline (from methods like :meth:`pandas.concat`,
+:meth:`~DataFrame.rename`, etc.). Both :class:`Series` and :class:`DataFrame`
+*disallow* duplicate labels by calling ``.set_flags(allows_duplicate_labels=False)``.
+(the default is to allow them). If there are duplicate labels, an exception
+will be raised.
+
+.. ipython:: python
+   :okexcept:
+
+   pd.Series(
+       [0, 1, 2],
+       index=['a', 'b', 'b']
+   ).set_flags(allows_duplicate_labels=False)
+
+This applies to both row and column labels for a :class:`DataFrame`
+
+.. ipython:: python
+   :okexcept:
+
+   pd.DataFrame(
+       [[0, 1, 2], [3, 4, 5]], columns=["A", "B", "C"],
+   ).set_flags(allows_duplicate_labels=False)
+
+This attribute can be checked or set with :attr:`~DataFrame.flags.allows_duplicate_labels`,
+which indicates whether that object can have duplicate labels.
+
+.. ipython:: python
+
+   df = (
+       pd.DataFrame({"A": [0, 1, 2, 3]},
+                    index=['x', 'y', 'X', 'Y'])
+         .set_flags(allows_duplicate_labels=False)
+   )
+   df
+   df.flags.allows_duplicate_labels
+
+:meth:`DataFrame.set_flags` can be used to return a new ``DataFrame`` with attributes
+like ``allows_duplicate_labels`` set to some value
+
+.. ipython:: python
+
+   df2 = df.set_flags(allows_duplicate_labels=True)
+   df2.flags.allows_duplicate_labels
+
+The new ``DataFrame`` returned is a view on the same data as the old ``DataFrame``.
+Or the property can just be set directly on the same object
+
+
+.. ipython:: python
+
+   df2.flags.allows_duplicate_labels = False
+   df2.flags.allows_duplicate_labels
+
+When processing raw, messy data you might initially read in the messy data
+(which potentially has duplicate labels), deduplicate, and then disallow duplicates
+going forward, to ensure that your data pipeline doesn't introduce duplicates.
+
+
+.. code-block:: python
+
+   >>> raw = pd.read_csv("...")
+   >>> deduplicated = raw.groupby(level=0).first()  # remove duplicates
+   >>> deduplicated.flags.allows_duplicate_labels = False  # disallow going forward
+
+Setting ``allows_duplicate_labels=True`` on a ``Series`` or ``DataFrame`` with duplicate
+labels or performing an operation that introduces duplicate labels on a ``Series`` or
+``DataFrame`` that disallows duplicates will raise an
+:class:`errors.DuplicateLabelError`.
+
+.. ipython:: python
+   :okexcept:
+
+   df.rename(str.upper)
+
+This error message contains the labels that are duplicated, and the numeric positions
+of all the duplicates (including the "original") in the ``Series`` or ``DataFrame``
+
+Duplicate Label Propagation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In general, disallowing duplicates is "sticky". It's preserved through
+operations.
+
+.. ipython:: python
+   :okexcept:
+
+   s1 = pd.Series(0, index=['a', 'b']).set_flags(allows_duplicate_labels=False)
+   s1
+   s1.head().rename({"a": "b"})
+
+.. warning::
+
+   This is an experimental feature. Currently, many methods fail to
+   propagate the ``allows_duplicate_labels`` value. In future versions
+   it is expected that every method taking or returning one or more
+   DataFrame or Series objects will propagate ``allows_duplicate_labels``.
diff --git a/doc/source/user_guide/index.rst b/doc/source/user_guide/index.rst
index 8226e72779588..2fc9e066e6712 100644
--- a/doc/source/user_guide/index.rst
+++ b/doc/source/user_guide/index.rst
@@ -33,6 +33,7 @@ Further information on any specific method can be obtained in the
     reshaping
     text
     missing_data
+    duplicates
     categorical
     integer_na
     boolean
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 9c8ee10a8a0af..7c083b95b21f3 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -13,6 +13,53 @@ including other versions of pandas.
 Enhancements
 ~~~~~~~~~~~~
 
+.. _whatsnew_120.duplicate_labels:
+
+Optionally disallow duplicate labels
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:class:`Series` and :class:`DataFrame` can now be created with ``allows_duplicate_labels=False`` flag to
+control whether the index or columns can contain duplicate labels (:issue:`28394`). This can be used to
+prevent accidental introduction of duplicate labels, which can affect downstream operations.
+
+By default, duplicates continue to be allowed
+
+.. ipython:: python
+
+   pd.Series([1, 2], index=['a', 'a'])
+
+.. ipython:: python
+   :okexcept:
+
+   pd.Series([1, 2], index=['a', 'a']).set_flags(allows_duplicate_labels=False)
+
+Pandas will propagate the ``allows_duplicate_labels`` property through many operations.
+
+.. ipython:: python
+   :okexcept:
+
+   a = (
+       pd.Series([1, 2], index=['a', 'b'])
+         .set_flags(allows_duplicate_labels=False)
+   )
+   a
+   # An operation introducing duplicates
+   a.reindex(['a', 'b', 'a'])
+
+.. warning::
+
+   This is an experimental feature. Currently, many methods fail to
+   propagate the ``allows_duplicate_labels`` value. In future versions
+   it is expected that every method taking or returning one or more
+   DataFrame or Series objects will propagate ``allows_duplicate_labels``.
+
+See :ref:`duplicates` for more.
+
+The ``allows_duplicate_labels`` flag is stored in the new :attr:`DataFrame.flags`
+attribute. This stores global attributes that apply to the *pandas object*. This
+differs from :attr:`DataFrame.attrs`, which stores information that applies to
+the dataset.
+
 Passing arguments to fsspec backends
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -53,6 +100,8 @@ For example:
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
+
+- Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a ``Series`` or ``DataFrame`` (:issue:`28394`)
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
 -
 -
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 36576da74c75d..2737bcd8f9ccf 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -100,6 +100,7 @@
     to_datetime,
     to_timedelta,
     # misc
+    Flags,
     Grouper,
     factorize,
     unique,
diff --git a/pandas/_testing.py b/pandas/_testing.py
index b402b040d9268..04d36749a3d8c 100644
--- a/pandas/_testing.py
+++ b/pandas/_testing.py
@@ -1225,6 +1225,7 @@ def assert_series_equal(
     check_categorical=True,
     check_category_order=True,
     check_freq=True,
+    check_flags=True,
     rtol=1.0e-5,
     atol=1.0e-8,
     obj="Series",
@@ -1271,6 +1272,11 @@ def assert_series_equal(
         .. versionadded:: 1.0.2
     check_freq : bool, default True
         Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
+    check_flags : bool, default True
+        Whether to check the `flags` attribute.
+
+        .. versionadded:: 1.2.0
+
     rtol : float, default 1e-5
         Relative tolerance. Only used when check_exact is False.
 
@@ -1307,6 +1313,9 @@ def assert_series_equal(
         msg2 = f"{len(right)}, {right.index}"
         raise_assert_detail(obj, "Series length are different", msg1, msg2)
 
+    if check_flags:
+        assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
+
     # index comparison
     assert_index_equal(
         left.index,
@@ -1429,6 +1438,7 @@ def assert_frame_equal(
     check_categorical=True,
     check_like=False,
     check_freq=True,
+    check_flags=True,
     rtol=1.0e-5,
     atol=1.0e-8,
     obj="DataFrame",
@@ -1490,6 +1500,8 @@ def assert_frame_equal(
         (same as in columns) - same labels must be with the same data.
     check_freq : bool, default True
         Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
+    check_flags : bool, default True
+        Whether to check the `flags` attribute.
     rtol : float, default 1e-5
         Relative tolerance. Only used when check_exact is False.
 
@@ -1563,6 +1575,9 @@ def assert_frame_equal(
     if check_like:
         left, right = left.reindex_like(right), right
 
+    if check_flags:
+        assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
+
     # index comparison
     assert_index_equal(
         left.index,
diff --git a/pandas/core/api.py b/pandas/core/api.py
index b0b65f9d0be34..348e9206d6e19 100644
--- a/pandas/core/api.py
+++ b/pandas/core/api.py
@@ -26,6 +26,7 @@
 )
 from pandas.core.arrays.string_ import StringDtype
 from pandas.core.construction import array
+from pandas.core.flags import Flags
 from pandas.core.groupby import Grouper, NamedAgg
 from pandas.core.indexes.api import (
     CategoricalIndex,
diff --git a/pandas/core/flags.py b/pandas/core/flags.py
new file mode 100644
index 0000000000000..15966d8ddce2a
--- /dev/null
+++ b/pandas/core/flags.py
@@ -0,0 +1,113 @@
+import weakref
+
+
+class Flags:
+    """
+    Flags that apply to pandas objects.
+
+    .. versionadded:: 1.2.0
+
+    Parameters
+    ----------
+    obj : Series or DataFrame
+        The object these flags are associated with
+    allows_duplicate_labels : bool, default True
+        Whether to allow duplicate labels in this object. By default,
+        duplicate labels are permitted. Setting this to ``False`` will
+        cause an :class:`errors.DuplicateLabelError` to be raised when
+        `index` (or columns for DataFrame) is not unique, or any
+        subsequent operation on introduces duplicates.
+        See :ref:`duplicates.disallow` for more.
+
+        .. warning::
+
+           This is an experimental feature. Currently, many methods fail to
+           propagate the ``allows_duplicate_labels`` value. In future versions
+           it is expected that every method taking or returning one or more
+           DataFrame or Series objects will propagate ``allows_duplicate_labels``.
+
+    Notes
+    -----
+    Attributes can be set in two ways
+
+    >>> df = pd.DataFrame()
+    >>> df.flags
+    <Flags(allows_duplicate_labels=True)>
+    >>> df.flags.allows_duplicate_labels = False
+    >>> df.flags
+    <Flags(allows_duplicate_labels=False)>
+
+    >>> df.flags['allows_duplicate_labels'] = True
+    >>> df.flags
+    <Flags(allows_duplicate_labels=True)>
+    """
+
+    _keys = {"allows_duplicate_labels"}
+
+    def __init__(self, obj, *, allows_duplicate_labels):
+        self._allows_duplicate_labels = allows_duplicate_labels
+        self._obj = weakref.ref(obj)
+
+    @property
+    def allows_duplicate_labels(self) -> bool:
+        """
+        Whether this object allows duplicate labels.
+
+        Setting ``allows_duplicate_labels=False`` ensures that the
+        index (and columns of a DataFrame) are unique. Most methods
+        that accept and return a Series or DataFrame will propagate
+        the value of ``allows_duplicate_labels``.
+
+        See :ref:`duplicates` for more.
+
+        See Also
+        --------
+        DataFrame.attrs : Set global metadata on this object.
+        DataFrame.set_flags : Set global flags on this object.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"A": [1, 2]}, index=['a', 'a'])
+        >>> df.allows_duplicate_labels
+        True
+        >>> df.allows_duplicate_labels = False
+        Traceback (most recent call last):
+            ...
+        pandas.errors.DuplicateLabelError: Index has duplicates.
+              positions
+        label
+        a        [0, 1]
+        """
+        return self._allows_duplicate_labels
+
+    @allows_duplicate_labels.setter
+    def allows_duplicate_labels(self, value: bool):
+        value = bool(value)
+        obj = self._obj()
+        if obj is None:
+            raise ValueError("This flag's object has been deleted.")
+
+        if not value:
+            for ax in obj.axes:
+                ax._maybe_check_unique()
+
+        self._allows_duplicate_labels = value
+
+    def __getitem__(self, key):
+        if key not in self._keys:
+            raise KeyError(key)
+
+        return getattr(self, key)
+
+    def __setitem__(self, key, value):
+        if key not in self._keys:
+            raise ValueError(f"Unknown flag {key}. Must be one of {self._keys}")
+        setattr(self, key, value)
+
+    def __repr__(self):
+        return f"<Flags(allows_duplicate_labels={self.allows_duplicate_labels})>"
+
+    def __eq__(self, other):
+        if isinstance(other, type(self)):
+            return self.allows_duplicate_labels == other.allows_duplicate_labels
+        return False
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7832547685567..b4c12b9e52f56 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -458,7 +458,9 @@ def __init__(
         if isinstance(data, BlockManager):
             if index is None and columns is None and dtype is None and copy is False:
                 # GH#33357 fastpath
-                NDFrame.__init__(self, data)
+                NDFrame.__init__(
+                    self, data,
+                )
                 return
 
             mgr = self._init_mgr(
@@ -3659,6 +3661,11 @@ def insert(self, loc, column, value, allow_duplicates=False) -> None:
         value : int, Series, or array-like
         allow_duplicates : bool, optional
         """
+        if allow_duplicates and not self.flags.allows_duplicate_labels:
+            raise ValueError(
+                "Cannot specify 'allow_duplicates=True' when "
+                "'self.flags.allows_duplicate_labels' is False."
+            )
         self._ensure_valid_index(value)
         value = self._sanitize_column(column, value, broadcast=False)
         self._mgr.insert(loc, column, value, allow_duplicates=allow_duplicates)
@@ -4559,6 +4566,7 @@ def set_index(
         4 16     10  2014    31
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
+        self._check_inplace_and_allows_duplicate_labels(inplace)
         if not isinstance(keys, list):
             keys = [keys]
 
@@ -4804,6 +4812,7 @@ class    max    type
         monkey         mammal    NaN    jump
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
+        self._check_inplace_and_allows_duplicate_labels(inplace)
         if inplace:
             new_obj = self
         else:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index fd924c964c1e1..c9eb4a34683f8 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -94,6 +94,7 @@
 from pandas.core.base import PandasObject, SelectionMixin
 import pandas.core.common as com
 from pandas.core.construction import create_series_with_explicit_dtype
+from pandas.core.flags import Flags
 from pandas.core.indexes.api import Index, MultiIndex, RangeIndex, ensure_index
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.indexes.period import Period, PeriodIndex
@@ -188,6 +189,7 @@ class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin):
         "_metadata",
         "__array_struct__",
         "__array_interface__",
+        "_flags",
     ]
     _internal_names_set: Set[str] = set(_internal_names)
     _accessors: Set[str] = set()
@@ -217,6 +219,7 @@ def __init__(
         else:
             attrs = dict(attrs)
         object.__setattr__(self, "_attrs", attrs)
+        object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True))
 
     @classmethod
     def _init_mgr(cls, mgr, axes, dtype=None, copy: bool = False) -> BlockManager:
@@ -237,15 +240,20 @@ def _init_mgr(cls, mgr, axes, dtype=None, copy: bool = False) -> BlockManager:
         return mgr
 
     # ----------------------------------------------------------------------
+    # attrs and flags
 
     @property
     def attrs(self) -> Dict[Optional[Hashable], Any]:
         """
-        Dictionary of global attributes on this object.
+        Dictionary of global attributes of this dataset.
 
         .. warning::
 
            attrs is experimental and may change without warning.
+
+        See Also
+        --------
+        DataFrame.flags
         """
         if self._attrs is None:
             self._attrs = {}
@@ -255,6 +263,96 @@ def attrs(self) -> Dict[Optional[Hashable], Any]:
     def attrs(self, value: Mapping[Optional[Hashable], Any]) -> None:
         self._attrs = dict(value)
 
+    @property
+    def flags(self) -> Flags:
+        """
+        Get the properties associated with this pandas object.
+
+        The available flags are
+
+        * :attr:`Flags.allows_duplicate_labels`
+
+        See Also
+        --------
+        Flags
+        DataFrame.attrs
+
+        Notes
+        -----
+        "Flags" differ from "metadata". Flags reflect properties of the
+        pandas object (the Series or DataFrame). Metadata refer to properties
+        of the dataset, and should be stored in :attr:`DataFrame.attrs`.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"A": [1, 2]})
+        >>> df.flags
+        <Flags(allows_duplicate_labels=True)>
+
+        Flags can be get or set using ``.``
+
+        >>> df.flags.allows_duplicate_labels
+        True
+        >>> df.flags.allows_duplicate_labels = False
+
+        Or by slicing with a key
+
+        >>> df.flags["allows_duplicate_labels"]
+        False
+        >>> df.flags["allows_duplicate_labels"] = True
+        """
+        return self._flags
+
+    def set_flags(
+        self: FrameOrSeries,
+        *,
+        copy: bool = False,
+        allows_duplicate_labels: Optional[bool] = None,
+    ) -> FrameOrSeries:
+        """
+        Return a new object with updated flags.
+
+        Parameters
+        ----------
+        allows_duplicate_labels : bool, optional
+            Whether the returned object allows duplicate labels.
+
+        Returns
+        -------
+        Series or DataFrame
+            The same type as the caller.
+
+        See Also
+        --------
+        DataFrame.attrs : Global metadata applying to this dataset.
+        DataFrame.flags : Global flags applying to this object.
+
+        Notes
+        -----
+        This method returns a new object that's a view on the same data
+        as the input. Mutating the input or the output values will be reflected
+        in the other.
+
+        This method is intended to be used in method chains.
+
+        "Flags" differ from "metadata". Flags reflect properties of the
+        pandas object (the Series or DataFrame). Metadata refer to properties
+        of the dataset, and should be stored in :attr:`DataFrame.attrs`.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"A": [1, 2]})
+        >>> df.flags.allows_duplicate_labels
+        True
+        >>> df2 = df.set_flags(allows_duplicate_labels=False)
+        >>> df2.flags.allows_duplicate_labels
+        False
+        """
+        df = self.copy(deep=copy)
+        if allows_duplicate_labels is not None:
+            df.flags["allows_duplicate_labels"] = allows_duplicate_labels
+        return df
+
     @classmethod
     def _validate_dtype(cls, dtype):
         """ validate the passed dtype """
@@ -557,6 +655,11 @@ def set_axis(self, labels, axis: Axis = 0, inplace: bool = False):
         --------
         %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s.
         """
+        self._check_inplace_and_allows_duplicate_labels(inplace)
+        return self._set_axis_nocheck(labels, axis, inplace)
+
+    def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool):
+        # NDFrame.rename with inplace=False calls set_axis(inplace=True) on a copy.
         if inplace:
             setattr(self, self._get_axis_name(axis), labels)
         else:
@@ -926,6 +1029,7 @@ def rename(
             else:
                 index = mapper
 
+        self._check_inplace_and_allows_duplicate_labels(inplace)
         result = self if inplace else self.copy(deep=copy)
 
         for axis_no, replacements in enumerate((index, columns)):
@@ -950,7 +1054,7 @@ def rename(
                     raise KeyError(f"{missing_labels} not found in axis")
 
             new_index = ax._transform_index(f, level)
-            result.set_axis(new_index, axis=axis_no, inplace=True)
+            result._set_axis_nocheck(new_index, axis=axis_no, inplace=True)
             result._clear_item_cache()
 
         if inplace:
@@ -1828,11 +1932,11 @@ def __getstate__(self) -> Dict[str, Any]:
             _typ=self._typ,
             _metadata=self._metadata,
             attrs=self.attrs,
+            _flags={k: self.flags[k] for k in self.flags._keys},
             **meta,
         )
 
     def __setstate__(self, state):
-
         if isinstance(state, BlockManager):
             self._mgr = state
         elif isinstance(state, dict):
@@ -1843,6 +1947,8 @@ def __setstate__(self, state):
             if typ is not None:
                 attrs = state.get("_attrs", {})
                 object.__setattr__(self, "_attrs", attrs)
+                flags = state.get("_flags", dict(allows_duplicate_labels=True))
+                object.__setattr__(self, "_flags", Flags(self, **flags))
 
                 # set in the order of internal names
                 # to avoid definitional recursion
@@ -1850,7 +1956,7 @@ def __setstate__(self, state):
                 # defined
                 meta = set(self._internal_names + self._metadata)
                 for k in list(meta):
-                    if k in state:
+                    if k in state and k != "_flags":
                         v = state[k]
                         object.__setattr__(self, k, v)
 
@@ -3802,6 +3908,13 @@ def __delitem__(self, key) -> None:
     # ----------------------------------------------------------------------
     # Unsorted
 
+    def _check_inplace_and_allows_duplicate_labels(self, inplace):
+        if inplace and not self.flags.allows_duplicate_labels:
+            raise ValueError(
+                "Cannot specify 'inplace=True' when "
+                "'self.flags.allows_duplicate_labels' is False."
+            )
+
     def get(self, key, default=None):
         """
         Get item from object for given key (ex: DataFrame column).
@@ -5163,10 +5276,19 @@ def __finalize__(
         if isinstance(other, NDFrame):
             for name in other.attrs:
                 self.attrs[name] = other.attrs[name]
+
+            self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels
             # For subclasses using _metadata.
             for name in self._metadata:
                 assert isinstance(name, str)
                 object.__setattr__(self, name, getattr(other, name, None))
+
+        if method == "concat":
+            allows_duplicate_labels = all(
+                x.flags.allows_duplicate_labels for x in other.objs
+            )
+            self.flags.allows_duplicate_labels = allows_duplicate_labels
+
         return self
 
     def __getattr__(self, name: str):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 48b02fc525cc1..65b5dfb6df911 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -27,7 +27,7 @@
 from pandas._typing import AnyArrayLike, Dtype, DtypeObj, Label
 from pandas.compat import set_function_name
 from pandas.compat.numpy import function as nv
-from pandas.errors import InvalidIndexError
+from pandas.errors import DuplicateLabelError, InvalidIndexError
 from pandas.util._decorators import Appender, Substitution, cache_readonly, doc
 
 from pandas.core.dtypes import concat as _concat
@@ -488,6 +488,52 @@ def _simple_new(cls, values, name: Label = None):
     def _constructor(self):
         return type(self)
 
+    def _maybe_check_unique(self):
+        """
+        Check that an Index has no duplicates.
+
+        This is typically only called via
+        `NDFrame.flags.allows_duplicate_labels.setter` when it's set to
+        True (duplicates aren't allowed).
+
+        Raises
+        ------
+        DuplicateLabelError
+            When the index is not unique.
+        """
+        if not self.is_unique:
+            msg = """Index has duplicates."""
+            duplicates = self._format_duplicate_message()
+            msg += "\n{}".format(duplicates)
+
+            raise DuplicateLabelError(msg)
+
+    def _format_duplicate_message(self):
+        """
+        Construct the DataFrame for a DuplicateLabelError.
+
+        This returns a DataFrame indicating the labels and positions
+        of duplicates in an index. This should only be called when it's
+        already known that duplicates are present.
+
+        Examples
+        --------
+        >>> idx = pd.Index(['a', 'b', 'a'])
+        >>> idx._format_duplicate_message()
+            positions
+        label
+        a        [0, 2]
+        """
+        from pandas import Series
+
+        duplicates = self[self.duplicated(keep="first")].unique()
+        assert len(duplicates)
+
+        out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
+        if self.nlevels == 1:
+            out = out.rename_axis("label")
+        return out.to_frame(name="positions")
+
     # --------------------------------------------------------------------
     # Index Internals Methods
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index a8a2d300fa168..9d84ce4b9ab2e 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -201,7 +201,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
     # Constructors
 
     def __init__(
-        self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
+        self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False,
     ):
 
         if (
@@ -211,7 +211,9 @@ def __init__(
             and copy is False
         ):
             # GH#33357 called with just the SingleBlockManager
-            NDFrame.__init__(self, data)
+            NDFrame.__init__(
+                self, data,
+            )
             self.name = name
             return
 
@@ -330,7 +332,9 @@ def __init__(
 
                 data = SingleBlockManager.from_array(data, index)
 
-        generic.NDFrame.__init__(self, data)
+        generic.NDFrame.__init__(
+            self, data,
+        )
         self.name = name
         self._set_axis(0, index, fastpath=True)
 
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index 6ac3004d29996..15389ca2c3e61 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -202,6 +202,27 @@ class NumbaUtilError(Exception):
     """
 
 
+class DuplicateLabelError(ValueError):
+    """
+    Error raised when an operation would introduce duplicate labels.
+
+    .. versionadded:: 1.2.0
+
+    Examples
+    --------
+    >>> s = pd.Series([0, 1, 2], index=['a', 'b', 'c']).set_flags(
+    ...     allows_duplicate_labels=False
+    ... )
+    >>> s.reindex(['a', 'a', 'b'])
+    Traceback (most recent call last):
+       ...
+    DuplicateLabelError: Index has duplicates.
+          positions
+    label
+    a        [0, 1]
+    """
+
+
 class InvalidIndexError(Exception):
     """
     Exception raised when attemping to use an invalid index key.
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index 1d25336cd3b70..54da13c3c620b 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -61,6 +61,7 @@ class TestPDApi(Base):
         "ExcelFile",
         "ExcelWriter",
         "Float64Index",
+        "Flags",
         "Grouper",
         "HDFStore",
         "Index",
diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py
index 78a830c7f43d8..9523fba953ad0 100644
--- a/pandas/tests/base/test_misc.py
+++ b/pandas/tests/base/test_misc.py
@@ -99,7 +99,7 @@ def test_ndarray_compat_properties(index_or_series_obj):
         assert getattr(obj, p, None) is not None
 
     # deprecated properties
-    for p in ["flags", "strides", "itemsize", "base", "data"]:
+    for p in ["strides", "itemsize", "base", "data"]:
         assert not hasattr(obj, p)
 
     msg = "can only convert an array of size 1 to a Python scalar"
@@ -116,6 +116,7 @@ def test_ndarray_compat_properties(index_or_series_obj):
 @pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
 def test_memory_usage(index_or_series_obj):
     obj = index_or_series_obj
+
     res = obj.memory_usage()
     res_deep = obj.memory_usage(deep=True)
 
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index 0716cf5e27119..b1c31a6f90133 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -553,6 +553,33 @@ def test_attrs(self):
         result = df.rename(columns=str)
         assert result.attrs == {"version": 1}
 
+    @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None])
+    def test_set_flags(self, allows_duplicate_labels):
+        df = pd.DataFrame({"A": [1, 2]})
+        result = df.set_flags(allows_duplicate_labels=allows_duplicate_labels)
+        if allows_duplicate_labels is None:
+            # We don't update when it's not provided
+            assert result.flags.allows_duplicate_labels is True
+        else:
+            assert result.flags.allows_duplicate_labels is allows_duplicate_labels
+
+        # We made a copy
+        assert df is not result
+
+        # We didn't mutate df
+        assert df.flags.allows_duplicate_labels is True
+
+        # But we didn't copy data
+        result.iloc[0, 0] = 0
+        assert df.iloc[0, 0] == 0
+
+        # Now we do copy.
+        result = df.set_flags(
+            copy=True, allows_duplicate_labels=allows_duplicate_labels
+        )
+        result.iloc[0, 0] = 10
+        assert df.iloc[0, 0] == 0
+
     def test_cache_on_copy(self):
         # GH 31784 _item_cache not cleared on copy causes incorrect reads after updates
         df = DataFrame({"a": [1]})
diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py
new file mode 100644
index 0000000000000..97468e1f10a8b
--- /dev/null
+++ b/pandas/tests/generic/test_duplicate_labels.py
@@ -0,0 +1,450 @@
+"""Tests dealing with the NDFrame.allows_duplicates."""
+import operator
+
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+not_implemented = pytest.mark.xfail(reason="Not implemented.")
+
+# ----------------------------------------------------------------------------
+# Preservation
+
+
+class TestPreserves:
+    @pytest.mark.parametrize(
+        "cls, data",
+        [
+            (pd.Series, np.array([])),
+            (pd.Series, [1, 2]),
+            (pd.DataFrame, {}),
+            (pd.DataFrame, {"A": [1, 2]}),
+        ],
+    )
+    def test_construction_ok(self, cls, data):
+        result = cls(data)
+        assert result.flags.allows_duplicate_labels is True
+
+        result = cls(data).set_flags(allows_duplicate_labels=False)
+        assert result.flags.allows_duplicate_labels is False
+
+    @pytest.mark.parametrize(
+        "func",
+        [
+            operator.itemgetter(["a"]),
+            operator.methodcaller("add", 1),
+            operator.methodcaller("rename", str.upper),
+            operator.methodcaller("rename", "name"),
+            pytest.param(operator.methodcaller("abs"), marks=not_implemented),
+            # TODO: test np.abs
+        ],
+    )
+    def test_preserved_series(self, func):
+        s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
+        assert func(s).flags.allows_duplicate_labels is False
+
+    @pytest.mark.parametrize(
+        "other", [pd.Series(0, index=["a", "b", "c"]), pd.Series(0, index=["a", "b"])]
+    )
+    # TODO: frame
+    @not_implemented
+    def test_align(self, other):
+        s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
+        a, b = s.align(other)
+        assert a.flags.allows_duplicate_labels is False
+        assert b.flags.allows_duplicate_labels is False
+
+    def test_preserved_frame(self):
+        df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags(
+            allows_duplicate_labels=False
+        )
+        assert df.loc[["a"]].flags.allows_duplicate_labels is False
+        assert df.loc[:, ["A", "B"]].flags.allows_duplicate_labels is False
+
+    @not_implemented
+    def test_to_frame(self):
+        s = pd.Series(dtype=float).set_flags(allows_duplicate_labels=False)
+        assert s.to_frame().flags.allows_duplicate_labels is False
+
+    @pytest.mark.parametrize("func", ["add", "sub"])
+    @pytest.mark.parametrize(
+        "frame", [False, pytest.param(True, marks=not_implemented)]
+    )
+    @pytest.mark.parametrize("other", [1, pd.Series([1, 2], name="A")])
+    def test_binops(self, func, other, frame):
+        df = pd.Series([1, 2], name="A", index=["a", "b"]).set_flags(
+            allows_duplicate_labels=False
+        )
+        if frame:
+            df = df.to_frame()
+        if isinstance(other, pd.Series) and frame:
+            other = other.to_frame()
+        func = operator.methodcaller(func, other)
+        assert df.flags.allows_duplicate_labels is False
+        assert func(df).flags.allows_duplicate_labels is False
+
+    @not_implemented
+    def test_preserve_getitem(self):
+        df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False)
+        assert df[["A"]].flags.allows_duplicate_labels is False
+        assert df["A"].flags.allows_duplicate_labels is False
+        assert df.loc[0].flags.allows_duplicate_labels is False
+        assert df.loc[[0]].flags.allows_duplicate_labels is False
+        assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False
+
+    @pytest.mark.xfail(reason="Unclear behavior.")
+    def test_ndframe_getitem_caching_issue(self):
+        # NDFrame.__getitem__ will cache the first df['A']. May need to
+        # invalidate that cache? Update the cached entries?
+        df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False)
+        assert df["A"].flags.allows_duplicate_labels is False
+        df.flags.allows_duplicate_labels = True
+        assert df["A"].flags.allows_duplicate_labels is True
+
+    @pytest.mark.parametrize(
+        "objs, kwargs",
+        [
+            # Series
+            (
+                [
+                    pd.Series(1, index=["a", "b"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                    pd.Series(2, index=["c", "d"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                ],
+                {},
+            ),
+            (
+                [
+                    pd.Series(1, index=["a", "b"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                    pd.Series(2, index=["a", "b"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                ],
+                {"ignore_index": True},
+            ),
+            (
+                [
+                    pd.Series(1, index=["a", "b"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                    pd.Series(2, index=["a", "b"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                ],
+                {"axis": 1},
+            ),
+            # Frame
+            (
+                [
+                    pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                    pd.DataFrame({"A": [1, 2]}, index=["c", "d"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                ],
+                {},
+            ),
+            (
+                [
+                    pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                    pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                ],
+                {"ignore_index": True},
+            ),
+            (
+                [
+                    pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                    pd.DataFrame({"B": [1, 2]}, index=["a", "b"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                ],
+                {"axis": 1},
+            ),
+            # Series / Frame
+            (
+                [
+                    pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                    pd.Series([1, 2], index=["a", "b"], name="B",).set_flags(
+                        allows_duplicate_labels=False,
+                    ),
+                ],
+                {"axis": 1},
+            ),
+        ],
+    )
+    def test_concat(self, objs, kwargs):
+        result = pd.concat(objs, **kwargs)
+        assert result.flags.allows_duplicate_labels is False
+
+    @pytest.mark.parametrize(
+        "left, right, kwargs, expected",
+        [
+            # false false false
+            pytest.param(
+                pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags(
+                    allows_duplicate_labels=False
+                ),
+                pd.DataFrame({"B": [0, 1]}, index=["a", "d"]).set_flags(
+                    allows_duplicate_labels=False
+                ),
+                dict(left_index=True, right_index=True),
+                False,
+                marks=not_implemented,
+            ),
+            # false true false
+            pytest.param(
+                pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags(
+                    allows_duplicate_labels=False
+                ),
+                pd.DataFrame({"B": [0, 1]}, index=["a", "d"]),
+                dict(left_index=True, right_index=True),
+                False,
+                marks=not_implemented,
+            ),
+            # true true true
+            (
+                pd.DataFrame({"A": [0, 1]}, index=["a", "b"]),
+                pd.DataFrame({"B": [0, 1]}, index=["a", "d"]),
+                dict(left_index=True, right_index=True),
+                True,
+            ),
+        ],
+    )
+    def test_merge(self, left, right, kwargs, expected):
+        result = pd.merge(left, right, **kwargs)
+        assert result.flags.allows_duplicate_labels is expected
+
+    @not_implemented
+    def test_groupby(self):
+        # XXX: This is under tested
+        # TODO:
+        #  - apply
+        #  - transform
+        #  - Should passing a grouper that disallows duplicates propagate?
+        df = pd.DataFrame({"A": [1, 2, 3]}).set_flags(allows_duplicate_labels=False)
+        result = df.groupby([0, 0, 1]).agg("count")
+        assert result.flags.allows_duplicate_labels is False
+
+    @pytest.mark.parametrize("frame", [True, False])
+    @not_implemented
+    def test_window(self, frame):
+        df = pd.Series(
+            1,
+            index=pd.date_range("2000", periods=12),
+            name="A",
+            allows_duplicate_labels=False,
+        )
+        if frame:
+            df = df.to_frame()
+        assert df.rolling(3).mean().flags.allows_duplicate_labels is False
+        assert df.ewm(3).mean().flags.allows_duplicate_labels is False
+        assert df.expanding(3).mean().flags.allows_duplicate_labels is False
+
+
+# ----------------------------------------------------------------------------
+# Raises
+
+
+class TestRaises:
+    @pytest.mark.parametrize(
+        "cls, axes",
+        [
+            (pd.Series, {"index": ["a", "a"], "dtype": float}),
+            (pd.DataFrame, {"index": ["a", "a"]}),
+            (pd.DataFrame, {"index": ["a", "a"], "columns": ["b", "b"]}),
+            (pd.DataFrame, {"columns": ["b", "b"]}),
+        ],
+    )
+    def test_set_flags_with_duplicates(self, cls, axes):
+        result = cls(**axes)
+        assert result.flags.allows_duplicate_labels is True
+
+        with pytest.raises(pd.errors.DuplicateLabelError):
+            cls(**axes).set_flags(allows_duplicate_labels=False)
+
+    @pytest.mark.parametrize(
+        "data",
+        [
+            pd.Series(index=[0, 0], dtype=float),
+            pd.DataFrame(index=[0, 0]),
+            pd.DataFrame(columns=[0, 0]),
+        ],
+    )
+    def test_setting_allows_duplicate_labels_raises(self, data):
+        with pytest.raises(pd.errors.DuplicateLabelError):
+            data.flags.allows_duplicate_labels = False
+
+        assert data.flags.allows_duplicate_labels is True
+
+    @pytest.mark.parametrize(
+        "func", [operator.methodcaller("append", pd.Series(0, index=["a", "b"]))]
+    )
+    def test_series_raises(self, func):
+        s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
+        with pytest.raises(pd.errors.DuplicateLabelError):
+            func(s)
+
+    @pytest.mark.parametrize(
+        "getter, target",
+        [
+            (operator.itemgetter(["A", "A"]), None),
+            # loc
+            (operator.itemgetter(["a", "a"]), "loc"),
+            pytest.param(
+                operator.itemgetter(("a", ["A", "A"])), "loc", marks=not_implemented
+            ),
+            pytest.param(
+                operator.itemgetter((["a", "a"], "A")), "loc", marks=not_implemented
+            ),
+            # iloc
+            (operator.itemgetter([0, 0]), "iloc"),
+            pytest.param(
+                operator.itemgetter((0, [0, 0])), "iloc", marks=not_implemented
+            ),
+            pytest.param(
+                operator.itemgetter(([0, 0], 0)), "iloc", marks=not_implemented
+            ),
+        ],
+    )
+    def test_getitem_raises(self, getter, target):
+        df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags(
+            allows_duplicate_labels=False
+        )
+        if target:
+            # df, df.loc, or df.iloc
+            target = getattr(df, target)
+        else:
+            target = df
+
+        with pytest.raises(pd.errors.DuplicateLabelError):
+            getter(target)
+
+    @pytest.mark.parametrize(
+        "objs, kwargs",
+        [
+            (
+                [
+                    pd.Series(1, index=[0, 1], name="a").set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                    pd.Series(2, index=[0, 1], name="a").set_flags(
+                        allows_duplicate_labels=False
+                    ),
+                ],
+                {"axis": 1},
+            )
+        ],
+    )
+    def test_concat_raises(self, objs, kwargs):
+        with pytest.raises(pd.errors.DuplicateLabelError):
+            pd.concat(objs, **kwargs)
+
+    @not_implemented
+    def test_merge_raises(self):
+        a = pd.DataFrame({"A": [0, 1, 2]}, index=["a", "b", "c"]).set_flags(
+            allows_duplicate_labels=False
+        )
+        b = pd.DataFrame({"B": [0, 1, 2]}, index=["a", "b", "b"])
+        with pytest.raises(pd.errors.DuplicateLabelError):
+            pd.merge(a, b, left_index=True, right_index=True)
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        pd.Index([1, 1]),
+        pd.Index(["a", "a"]),
+        pd.Index([1.1, 1.1]),
+        pd.PeriodIndex([pd.Period("2000", "D")] * 2),
+        pd.DatetimeIndex([pd.Timestamp("2000")] * 2),
+        pd.TimedeltaIndex([pd.Timedelta("1D")] * 2),
+        pd.CategoricalIndex(["a", "a"]),
+        pd.IntervalIndex([pd.Interval(0, 1)] * 2),
+        pd.MultiIndex.from_tuples([("a", 1), ("a", 1)]),
+    ],
+    ids=lambda x: type(x).__name__,
+)
+def test_raises_basic(idx):
+    with pytest.raises(pd.errors.DuplicateLabelError):
+        pd.Series(1, index=idx).set_flags(allows_duplicate_labels=False)
+
+    with pytest.raises(pd.errors.DuplicateLabelError):
+        pd.DataFrame({"A": [1, 1]}, index=idx).set_flags(allows_duplicate_labels=False)
+
+    with pytest.raises(pd.errors.DuplicateLabelError):
+        pd.DataFrame([[1, 2]], columns=idx).set_flags(allows_duplicate_labels=False)
+
+
+def test_format_duplicate_labels_message():
+    idx = pd.Index(["a", "b", "a", "b", "c"])
+    result = idx._format_duplicate_message()
+    expected = pd.DataFrame(
+        {"positions": [[0, 2], [1, 3]]}, index=pd.Index(["a", "b"], name="label")
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_format_duplicate_labels_message_multi():
+    idx = pd.MultiIndex.from_product([["A"], ["a", "b", "a", "b", "c"]])
+    result = idx._format_duplicate_message()
+    expected = pd.DataFrame(
+        {"positions": [[0, 2], [1, 3]]},
+        index=pd.MultiIndex.from_product([["A"], ["a", "b"]]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_dataframe_insert_raises():
+    df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False)
+    with pytest.raises(ValueError, match="Cannot specify"):
+        df.insert(0, "A", [3, 4], allow_duplicates=True)
+
+
+@pytest.mark.parametrize(
+    "method, frame_only",
+    [
+        (operator.methodcaller("set_index", "A", inplace=True), True),
+        (operator.methodcaller("set_axis", ["A", "B"], inplace=True), False),
+        (operator.methodcaller("reset_index", inplace=True), True),
+        (operator.methodcaller("rename", lambda x: x, inplace=True), False),
+    ],
+)
+def test_inplace_raises(method, frame_only):
+    df = pd.DataFrame({"A": [0, 0], "B": [1, 2]}).set_flags(
+        allows_duplicate_labels=False
+    )
+    s = df["A"]
+    s.flags.allows_duplicate_labels = False
+    msg = "Cannot specify"
+
+    with pytest.raises(ValueError, match=msg):
+        method(df)
+    if not frame_only:
+        with pytest.raises(ValueError, match=msg):
+            method(s)
+
+
+def test_pickle():
+    a = pd.Series([1, 2]).set_flags(allows_duplicate_labels=False)
+    b = tm.round_trip_pickle(a)
+    tm.assert_series_equal(a, b)
+
+    a = pd.DataFrame({"A": []}).set_flags(allows_duplicate_labels=False)
+    b = tm.round_trip_pickle(a)
+    tm.assert_frame_equal(a, b)
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
index 5e66925a38ec6..23bb673586768 100644
--- a/pandas/tests/generic/test_generic.py
+++ b/pandas/tests/generic/test_generic.py
@@ -887,3 +887,13 @@ def test_axis_numbers_deprecated(self, box):
         obj = box(dtype=object)
         with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
             obj._AXIS_NUMBERS
+
+    @pytest.mark.parametrize("as_frame", [True, False])
+    def test_flags_identity(self, as_frame):
+        s = pd.Series([1, 2])
+        if as_frame:
+            s = s.to_frame()
+
+        assert s.flags is s.flags
+        s2 = s.copy()
+        assert s2.flags is not s.flags
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
index d81e8a4f82ffb..a69c0ee75eaba 100644
--- a/pandas/tests/series/test_api.py
+++ b/pandas/tests/series/test_api.py
@@ -524,6 +524,32 @@ def test_attrs(self):
         result = s + 1
         assert result.attrs == {"version": 1}
 
+    @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None])
+    def test_set_flags(self, allows_duplicate_labels):
+        df = pd.Series([1, 2])
+        result = df.set_flags(allows_duplicate_labels=allows_duplicate_labels)
+        if allows_duplicate_labels is None:
+            # We don't update when it's not provided
+            assert result.flags.allows_duplicate_labels is True
+        else:
+            assert result.flags.allows_duplicate_labels is allows_duplicate_labels
+
+        # We made a copy
+        assert df is not result
+        # We didn't mutate df
+        assert df.flags.allows_duplicate_labels is True
+
+        # But we didn't copy data
+        result.iloc[0] = 0
+        assert df.iloc[0] == 0
+
+        # Now we do copy.
+        result = df.set_flags(
+            copy=True, allows_duplicate_labels=allows_duplicate_labels
+        )
+        result.iloc[0] = 10
+        assert df.iloc[0] == 0
+
 
 class TestCategoricalSeries:
     @pytest.mark.parametrize(
diff --git a/pandas/tests/test_flags.py b/pandas/tests/test_flags.py
new file mode 100644
index 0000000000000..f6e3ae4980afb
--- /dev/null
+++ b/pandas/tests/test_flags.py
@@ -0,0 +1,48 @@
+import pytest
+
+import pandas as pd
+
+
+class TestFlags:
+    def test_equality(self):
+        a = pd.DataFrame().set_flags(allows_duplicate_labels=True).flags
+        b = pd.DataFrame().set_flags(allows_duplicate_labels=False).flags
+
+        assert a == a
+        assert b == b
+        assert a != b
+        assert a != 2
+
+    def test_set(self):
+        df = pd.DataFrame().set_flags(allows_duplicate_labels=True)
+        a = df.flags
+        a.allows_duplicate_labels = False
+        assert a.allows_duplicate_labels is False
+        a["allows_duplicate_labels"] = True
+        assert a.allows_duplicate_labels is True
+
+    def test_repr(self):
+        a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=True).flags)
+        assert a == "<Flags(allows_duplicate_labels=True)>"
+        a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=False).flags)
+        assert a == "<Flags(allows_duplicate_labels=False)>"
+
+    def test_obj_ref(self):
+        df = pd.DataFrame()
+        flags = df.flags
+        del df
+        with pytest.raises(ValueError, match="object has been deleted"):
+            flags.allows_duplicate_labels = True
+
+    def test_getitem(self):
+        df = pd.DataFrame()
+        flags = df.flags
+        assert flags["allows_duplicate_labels"] is True
+        flags["allows_duplicate_labels"] = False
+        assert flags["allows_duplicate_labels"] is False
+
+        with pytest.raises(KeyError):
+            flags["a"]
+
+        with pytest.raises(ValueError):
+            flags["a"] = 10
diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py
index 3aa3c64923b14..5174ff005b5fb 100644
--- a/pandas/tests/util/test_assert_frame_equal.py
+++ b/pandas/tests/util/test_assert_frame_equal.py
@@ -268,3 +268,18 @@ def test_assert_frame_equal_ignore_extension_dtype_mismatch(right_dtype):
     left = pd.DataFrame({"a": [1, 2, 3]}, dtype="Int64")
     right = pd.DataFrame({"a": [1, 2, 3]}, dtype=right_dtype)
     tm.assert_frame_equal(left, right, check_dtype=False)
+
+
+def test_allows_duplicate_labels():
+    left = pd.DataFrame()
+    right = pd.DataFrame().set_flags(allows_duplicate_labels=False)
+    tm.assert_frame_equal(left, left)
+    tm.assert_frame_equal(right, right)
+    tm.assert_frame_equal(left, right, check_flags=False)
+    tm.assert_frame_equal(right, left, check_flags=False)
+
+    with pytest.raises(AssertionError, match="<Flags"):
+        tm.assert_frame_equal(left, right)
+
+    with pytest.raises(AssertionError, match="<Flags"):
+        tm.assert_frame_equal(left, right)
diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
index f3c66052b1904..53746aa048663 100644
--- a/pandas/tests/util/test_assert_series_equal.py
+++ b/pandas/tests/util/test_assert_series_equal.py
@@ -304,3 +304,18 @@ def test_assert_series_equal_ignore_extension_dtype_mismatch(right_dtype):
     left = pd.Series([1, 2, 3], dtype="Int64")
     right = pd.Series([1, 2, 3], dtype=right_dtype)
     tm.assert_series_equal(left, right, check_dtype=False)
+
+
+def test_allows_duplicate_labels():
+    left = pd.Series([1])
+    right = pd.Series([1]).set_flags(allows_duplicate_labels=False)
+    tm.assert_series_equal(left, left)
+    tm.assert_series_equal(right, right)
+    tm.assert_series_equal(left, right, check_flags=False)
+    tm.assert_series_equal(right, left, check_flags=False)
+
+    with pytest.raises(AssertionError, match="<Flags"):
+        tm.assert_series_equal(left, right)
+
+    with pytest.raises(AssertionError, match="<Flags"):
+        tm.assert_series_equal(left, right)