From ede0dae3b74a5b9cba11ad6d86fcbc92329f631c Mon Sep 17 00:00:00 2001
From: Gjelt <math-and-data@users.noreply.github.com>
Date: Tue, 27 Nov 2018 00:12:13 +0100
Subject: [PATCH 01/78] DOC: update the DataFrame.reindex_like docstring
 (#22775)

---
 pandas/core/frame.py   |  90 ++++++++++++--------
 pandas/core/generic.py | 184 +++++++++++++++++++++++++++++------------
 2 files changed, 187 insertions(+), 87 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 032d68e2d3e8c1..592825a7ea63b7 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3924,22 +3924,36 @@ def shift(self, periods=1, freq=None, axis=0):
     def set_index(self, keys, drop=True, append=False, inplace=False,
                   verify_integrity=False):
         """
+        Set the DataFrame index using existing columns.
+
         Set the DataFrame index (row labels) using one or more existing
-        columns. By default yields a new object.
+        columns. The index can replace the existing index or expand on it.
 
         Parameters
         ----------
-        keys : column label or list of column labels / arrays
-        drop : boolean, default True
-            Delete columns to be used as the new index
-        append : boolean, default False
-            Whether to append columns to existing index
-        inplace : boolean, default False
-            Modify the DataFrame in place (do not create a new object)
-        verify_integrity : boolean, default False
+        keys : label or list of label
+            Name or names of the columns that will be used as the index.
+        drop : bool, default True
+            Delete columns to be used as the new index.
+        append : bool, default False
+            Whether to append columns to existing index.
+        inplace : bool, default False
+            Modify the DataFrame in place (do not create a new object).
+        verify_integrity : bool, default False
             Check the new index for duplicates. Otherwise defer the check until
             necessary. Setting to False will improve the performance of this
-            method
+            method.
+
+        Returns
+        -------
+        DataFrame
+            Changed row labels.
+
+        See Also
+        --------
+        DataFrame.reset_index : Opposite of set_index.
+        DataFrame.reindex : Change to new indices or expand indices.
+        DataFrame.reindex_like : Change to same indices as other DataFrame.
 
         Returns
         -------
@@ -3949,22 +3963,23 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         --------
         >>> df = pd.DataFrame({'month': [1, 4, 7, 10],
         ...                    'year': [2012, 2014, 2013, 2014],
-        ...                    'sale':[55, 40, 84, 31]})
-           month  sale  year
-        0  1      55    2012
-        1  4      40    2014
-        2  7      84    2013
-        3  10     31    2014
+        ...                    'sale': [55, 40, 84, 31]})
+        >>> df
+           month  year  sale
+        0      1  2012    55
+        1      4  2014    40
+        2      7  2013    84
+        3     10  2014    31
 
         Set the index to become the 'month' column:
 
         >>> df.set_index('month')
-               sale  year
+               year  sale
         month
-        1      55    2012
-        4      40    2014
-        7      84    2013
-        10     31    2014
+        1      2012    55
+        4      2014    40
+        7      2013    84
+        10     2014    31
 
         Create a multi-index using columns 'year' and 'month':
 
@@ -4072,22 +4087,22 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
     def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
                     col_fill=''):
         """
-        For DataFrame with multi-level index, return new DataFrame with
-        labeling information in the columns under the index names, defaulting
-        to 'level_0', 'level_1', etc. if any are None. For a standard index,
-        the index name will be used (if set), otherwise a default 'index' or
-        'level_0' (if 'index' is already taken) will be used.
+        Reset the index, or a level of it.
+
+        Reset the index of the DataFrame, and use the default one instead.
+        If the DataFrame has a MultiIndex, this method can remove one or more
+        levels.
 
         Parameters
         ----------
         level : int, str, tuple, or list, default None
             Only remove the given levels from the index. Removes all levels by
-            default
-        drop : boolean, default False
+            default.
+        drop : bool, default False
             Do not try to insert index into dataframe columns. This resets
             the index to the default integer index.
-        inplace : boolean, default False
-            Modify the DataFrame in place (do not create a new object)
+        inplace : bool, default False
+            Modify the DataFrame in place (do not create a new object).
         col_level : int or str, default 0
             If the columns have multiple levels, determines which level the
             labels are inserted into. By default it is inserted into the first
@@ -4098,13 +4113,20 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
 
         Returns
         -------
-        resetted : DataFrame
+        DataFrame
+            DataFrame with the new index.
+
+        See Also
+        --------
+        DataFrame.set_index : Opposite of reset_index.
+        DataFrame.reindex : Change to new indices or expand indices.
+        DataFrame.reindex_like : Change to same indices as other DataFrame.
 
         Examples
         --------
-        >>> df = pd.DataFrame([('bird',    389.0),
-        ...                    ('bird',     24.0),
-        ...                    ('mammal',   80.5),
+        >>> df = pd.DataFrame([('bird', 389.0),
+        ...                    ('bird', 24.0),
+        ...                    ('mammal', 80.5),
         ...                    ('mammal', np.nan)],
         ...                   index=['falcon', 'parrot', 'lion', 'monkey'],
         ...                   columns=('class', 'max_speed'))
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 3a7016ce396769..e8402e7eefd901 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3423,29 +3423,99 @@ def select(self, crit, axis=0):
 
     def reindex_like(self, other, method=None, copy=True, limit=None,
                      tolerance=None):
-        """Return an object with matching indices to myself.
+        """
+        Return an object with matching indices as other object.
+
+        Conform the object to the same index on all axes. Optional
+        filling logic, placing NaN in locations having no value
+        in the previous index. A new object is produced unless the
+        new index is equivalent to the current one and copy=False.
 
         Parameters
         ----------
-        other : Object
-        method : string or None
-        copy : boolean, default True
+        other : Object of the same data type
+            Its row and column indices are used to define the new indices
+            of this object.
+        method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
+            Method to use for filling holes in reindexed DataFrame.
+            Please note: this is only applicable to DataFrames/Series with a
+            monotonically increasing/decreasing index.
+
+            * None (default): don't fill gaps
+            * pad / ffill: propagate last valid observation forward to next
+              valid
+            * backfill / bfill: use next valid observation to fill gap
+            * nearest: use nearest valid observations to fill gap
+
+        copy : bool, default True
+            Return a new object, even if the passed indexes are the same.
         limit : int, default None
             Maximum number of consecutive labels to fill for inexact matches.
         tolerance : optional
-            Maximum distance between labels of the other object and this
-            object for inexact matches. Can be list-like.
+            Maximum distance between original and new labels for inexact
+            matches. The values of the index at the matching locations most
+            satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
+
+            Tolerance may be a scalar value, which applies the same tolerance
+            to all values, or list-like, which applies variable tolerance per
+            element. List-like includes list, tuple, array, Series, and must be
+            the same size as the index and its dtype must exactly match the
+            index's type.
 
             .. versionadded:: 0.21.0 (list-like tolerance)
 
+        Returns
+        -------
+        Series or DataFrame
+            Same type as caller, but with changed indices on each axis.
+
+        See Also
+        --------
+        DataFrame.set_index : Set row labels.
+        DataFrame.reset_index : Remove row labels or move them to new columns.
+        DataFrame.reindex : Change to new indices or expand indices.
+
         Notes
         -----
-        Like calling s.reindex(index=other.index, columns=other.columns,
-                               method=...)
+        Same as calling
+        ``.reindex(index=other.index, columns=other.columns,...)``.
 
-        Returns
-        -------
-        reindexed : same as input
+        Examples
+        --------
+        >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'],
+        ...                     [31, 87.8, 'high'],
+        ...                     [22, 71.6, 'medium'],
+        ...                     [35, 95, 'medium']],
+        ...     columns=['temp_celsius', 'temp_fahrenheit', 'windspeed'],
+        ...     index=pd.date_range(start='2014-02-12',
+        ...                         end='2014-02-15', freq='D'))
+
+        >>> df1
+                    temp_celsius  temp_fahrenheit windspeed
+        2014-02-12          24.3             75.7      high
+        2014-02-13          31.0             87.8      high
+        2014-02-14          22.0             71.6    medium
+        2014-02-15          35.0             95.0    medium
+
+        >>> df2 = pd.DataFrame([[28, 'low'],
+        ...                     [30, 'low'],
+        ...                     [35.1, 'medium']],
+        ...     columns=['temp_celsius', 'windspeed'],
+        ...     index=pd.DatetimeIndex(['2014-02-12', '2014-02-13',
+        ...                             '2014-02-15']))
+
+        >>> df2
+                    temp_celsius windspeed
+        2014-02-12          28.0       low
+        2014-02-13          30.0       low
+        2014-02-15          35.1    medium
+
+        >>> df2.reindex_like(df1)
+                    temp_celsius  temp_fahrenheit windspeed
+        2014-02-12          28.0              NaN       low
+        2014-02-13          30.0              NaN       low
+        2014-02-14           NaN              NaN       NaN
+        2014-02-15          35.1              NaN    medium
         """
         d = other._construct_axes_dict(axes=self._AXIS_ORDERS, method=method,
                                        copy=copy, limit=limit,
@@ -3812,36 +3882,36 @@ def reindex(self, *args, **kwargs):
         Conform %(klass)s to new index with optional filling logic, placing
         NA/NaN in locations having no value in the previous index. A new object
         is produced unless the new index is equivalent to the current one and
-        copy=False
+        ``copy=False``.
 
         Parameters
         ----------
         %(optional_labels)s
-        %(axes)s : array-like, optional (should be specified using keywords)
-            New labels / index to conform to. Preferably an Index object to
-            avoid duplicating data
+        %(axes)s : array-like, optional
+            New labels / index to conform to, should be specified using
+            keywords. Preferably an Index object to avoid duplicating data
         %(optional_axis)s
-        method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional
-            method to use for filling holes in reindexed DataFrame.
+        method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
+            Method to use for filling holes in reindexed DataFrame.
             Please note: this is only applicable to DataFrames/Series with a
             monotonically increasing/decreasing index.
 
-            * default: don't fill gaps
+            * None (default): don't fill gaps
             * pad / ffill: propagate last valid observation forward to next
               valid
             * backfill / bfill: use next valid observation to fill gap
             * nearest: use nearest valid observations to fill gap
 
-        copy : boolean, default True
-            Return a new object, even if the passed indexes are the same
+        copy : bool, default True
+            Return a new object, even if the passed indexes are the same.
         level : int or name
             Broadcast across a level, matching Index values on the
-            passed MultiIndex level
+            passed MultiIndex level.
         fill_value : scalar, default np.NaN
             Value to use for missing values. Defaults to NaN, but can be any
-            "compatible" value
+            "compatible" value.
         limit : int, default None
-            Maximum number of consecutive elements to forward or backward fill
+            Maximum number of consecutive elements to forward or backward fill.
         tolerance : optional
             Maximum distance between original and new labels for inexact
             matches. The values of the index at the matching locations most
@@ -3855,6 +3925,12 @@ def reindex(self, *args, **kwargs):
 
             .. versionadded:: 0.21.0 (list-like tolerance)
 
+        See Also
+        --------
+        DataFrame.set_index : Set row labels.
+        DataFrame.reset_index : Remove row labels or move them to new columns.
+        DataFrame.reindex_like : Change to same indices as other DataFrame.
+
         Examples
         --------
 
@@ -3946,12 +4022,12 @@ def reindex(self, *args, **kwargs):
         ...                    index=date_index)
         >>> df2
                     prices
-        2010-01-01     100
-        2010-01-02     101
+        2010-01-01   100.0
+        2010-01-02   101.0
         2010-01-03     NaN
-        2010-01-04     100
-        2010-01-05      89
-        2010-01-06      88
+        2010-01-04   100.0
+        2010-01-05    89.0
+        2010-01-06    88.0
 
         Suppose we decide to expand the dataframe to cover a wider
         date range.
@@ -3962,12 +4038,12 @@ def reindex(self, *args, **kwargs):
         2009-12-29     NaN
         2009-12-30     NaN
         2009-12-31     NaN
-        2010-01-01     100
-        2010-01-02     101
+        2010-01-01   100.0
+        2010-01-02   101.0
         2010-01-03     NaN
-        2010-01-04     100
-        2010-01-05      89
-        2010-01-06      88
+        2010-01-04   100.0
+        2010-01-05    89.0
+        2010-01-06    88.0
         2010-01-07     NaN
 
         The index entries that did not have a value in the original data frame
@@ -3980,15 +4056,15 @@ def reindex(self, *args, **kwargs):
 
         >>> df2.reindex(date_index2, method='bfill')
                     prices
-        2009-12-29     100
-        2009-12-30     100
-        2009-12-31     100
-        2010-01-01     100
-        2010-01-02     101
+        2009-12-29   100.0
+        2009-12-30   100.0
+        2009-12-31   100.0
+        2010-01-01   100.0
+        2010-01-02   101.0
         2010-01-03     NaN
-        2010-01-04     100
-        2010-01-05      89
-        2010-01-06      88
+        2010-01-04   100.0
+        2010-01-05    89.0
+        2010-01-06    88.0
         2010-01-07     NaN
 
         Please note that the ``NaN`` value present in the original dataframe
@@ -4002,7 +4078,7 @@ def reindex(self, *args, **kwargs):
 
         Returns
         -------
-        reindexed : %(klass)s
+        %(klass)s with changed index.
         """
         # TODO: Decide if we care about having different examples for different
         # kinds
@@ -4074,11 +4150,10 @@ def _needs_reindex_multi(self, axes, method, level):
     def _reindex_multi(self, axes, copy, fill_value):
         return NotImplemented
 
-    _shared_docs[
-        'reindex_axis'] = ("""Conform input object to new index with optional
-        filling logic, placing NA/NaN in locations having no value in the
-        previous index. A new object is produced unless the new index is
-        equivalent to the current one and copy=False
+    _shared_docs['reindex_axis'] = ("""Conform input object to new index
+        with optional filling logic, placing NA/NaN in locations having
+        no value in the previous index. A new object is produced unless
+        the new index is equivalent to the current one and copy=False.
 
         Parameters
         ----------
@@ -4115,17 +4190,20 @@ def _reindex_multi(self, axes, copy, fill_value):
 
             .. versionadded:: 0.21.0 (list-like tolerance)
 
-        Examples
-        --------
-        >>> df.reindex_axis(['A', 'B', 'C'], axis=1)
-
         See Also
         --------
-        reindex, reindex_like
+        DataFrame.set_index : Set row labels.
+        DataFrame.reset_index : Remove row labels or move them to new columns.
+        DataFrame.reindex : Change to new indices or expand indices.
+        DataFrame.reindex_like : Change to same indices as other DataFrame.
 
         Returns
         -------
-        reindexed : %(klass)s
+        %(klass)s
+
+        Examples
+        --------
+        >>> df.reindex_axis(['A', 'B', 'C'], axis=1)
         """)
 
     @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)

From e5d31abe225edda99b80b0647e77c0579b0995e5 Mon Sep 17 00:00:00 2001
From: Fabian Haase <haase.fabian@gmail.com>
Date: Tue, 27 Nov 2018 02:31:49 +0100
Subject: [PATCH 02/78] DOC: Change code-block to ipython in r_interface.rst
 (#23906)

---
 doc/source/r_interface.rst | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/doc/source/r_interface.rst b/doc/source/r_interface.rst
index d0b26016680692..f40f9199aaf668 100644
--- a/doc/source/r_interface.rst
+++ b/doc/source/r_interface.rst
@@ -33,10 +33,11 @@ See also the documentation of the `rpy2 <http://rpy2.bitbucket.org/>`__ project:
 
 In the remainder of this page, a few examples of explicit conversion is given. The pandas conversion of rpy2 needs first to be activated:
 
-.. code-block:: python
+.. ipython::
+    :verbatim:
 
-    >>> from rpy2.robjects import pandas2ri  # doctest: +SKIP
-    >>> pandas2ri.activate()  # doctest: +SKIP
+    In [1]: from rpy2.robjects import pandas2ri
+       ...: pandas2ri.activate()
 
 Transferring R data sets into Python
 ------------------------------------
@@ -44,11 +45,15 @@ Transferring R data sets into Python
 Once the pandas conversion is activated (``pandas2ri.activate()``), many conversions
 of R to pandas objects will be done automatically. For example, to obtain the 'iris' dataset as a pandas DataFrame:
 
-.. code-block:: python
+.. ipython::
+    :verbatim:
 
-    >>> from rpy2.robjects import r  # doctest: +SKIP
-    >>> r.data('iris')  # doctest: +SKIP
-    >>> r['iris'].head()  # doctest: +SKIP
+    In [2]: from rpy2.robjects import r
+
+    In [3]: r.data('iris')
+
+    In [4]: r['iris'].head()
+    Out[4]:
         Sepal.Length  Sepal.Width  Petal.Length  Petal.Width Species
     0           5.1          3.5           1.4          0.2  setosa
     1           4.9          3.0           1.4          0.2  setosa
@@ -66,14 +71,19 @@ Converting DataFrames into R objects
 The ``pandas2ri.py2ri`` function support the reverse operation to convert
 DataFrames into the equivalent R object (that is, **data.frame**):
 
-.. code-block:: python
+.. ipython::
+   :verbatim:
+
+   In [5]: df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]},
+      ...:                   index=["one", "two", "three"])
+
+   In [6]: r_dataframe = pandas2ri.py2ri(df)
+
+   In [7]: print(type(r_dataframe))
+   Out[7]: <class 'rpy2.robjects.vectors.DataFrame'>
 
-   >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]},
-   ...                   index=["one", "two", "three"])  # doctest: +SKIP
-   >>> r_dataframe = pandas2ri.py2ri(df)  # doctest: +SKIP
-   >>> print(type(r_dataframe))  # doctest: +SKIP
-   <class 'rpy2.robjects.vectors.DataFrame'>
-   >>> print(r_dataframe)  # doctest: +SKIP
+   In [8]: print(r_dataframe)
+   Out[8]:
          A B C
    one   1 4 7
    two   2 5 8

From f8b6496e1ff727aab2fe3ac599f0429c27a47445 Mon Sep 17 00:00:00 2001
From: Fabian Haase <haase.fabian@gmail.com>
Date: Tue, 27 Nov 2018 02:35:21 +0100
Subject: [PATCH 03/78] Fix PEP-8 issues in timedeltas.rst (#23905)

Signed-off-by: Fabian Haase <haase.fabian@gmail.com>
---
 doc/source/timedeltas.rst | 42 +++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst
index e602e45784f4a5..8dab39aafbf676 100644
--- a/doc/source/timedeltas.rst
+++ b/doc/source/timedeltas.rst
@@ -4,18 +4,12 @@
 .. ipython:: python
    :suppress:
 
-   import datetime
    import numpy as np
    import pandas as pd
+
    np.random.seed(123456)
-   randn = np.random.randn
-   randint = np.random.randint
    np.set_printoptions(precision=4, suppress=True)
-   pd.options.display.max_rows=15
-   import dateutil
-   import pytz
-   from dateutil.relativedelta import relativedelta
-   from pandas.tseries.offsets import *
+   pd.options.display.max_rows = 15
 
 .. _timedeltas.timedeltas:
 
@@ -37,6 +31,8 @@ You can construct a ``Timedelta`` scalar through various arguments:
 
 .. ipython:: python
 
+   import datetime
+
    # strings
    pd.Timedelta('1 days')
    pd.Timedelta('1 days 00:00:00')
@@ -74,13 +70,14 @@ You can construct a ``Timedelta`` scalar through various arguments:
 
 .. ipython:: python
 
-   pd.Timedelta(Second(2))
+   pd.Timedelta(pd.offsets.Second(2))
 
 Further, operations among the scalars yield another scalar ``Timedelta``.
 
 .. ipython:: python
 
-   pd.Timedelta(Day(2)) + pd.Timedelta(Second(2)) + pd.Timedelta('00:00:00.000123')
+   pd.Timedelta(pd.offsets.Day(2)) + pd.Timedelta(pd.offsets.Second(2)) +\
+       pd.Timedelta('00:00:00.000123')
 
 to_timedelta
 ~~~~~~~~~~~~
@@ -135,8 +132,8 @@ subtraction operations on ``datetime64[ns]`` Series, or ``Timestamps``.
 .. ipython:: python
 
    s = pd.Series(pd.date_range('2012-1-1', periods=3, freq='D'))
-   td = pd.Series([ pd.Timedelta(days=i) for i in range(3) ])
-   df = pd.DataFrame(dict(A = s, B = td))
+   td = pd.Series([pd.Timedelta(days=i) for i in range(3)])
+   df = pd.DataFrame({'A': s, 'B': td})
    df
    df['C'] = df['A'] + df['B']
    df
@@ -145,8 +142,8 @@ subtraction operations on ``datetime64[ns]`` Series, or ``Timestamps``.
    s - s.max()
    s - datetime.datetime(2011, 1, 1, 3, 5)
    s + datetime.timedelta(minutes=5)
-   s + Minute(5)
-   s + Minute(5) + Milli(5)
+   s + pd.offsets.Minute(5)
+   s + pd.offsets.Minute(5) + pd.offsets.Milli(5)
 
 Operations with scalars from a ``timedelta64[ns]`` series:
 
@@ -184,7 +181,7 @@ Operands can also appear in a reversed order (a singular object operated with a
    A = s - pd.Timestamp('20120101') - pd.Timedelta('00:05:05')
    B = s - pd.Series(pd.date_range('2012-1-2', periods=3, freq='D'))
 
-   df = pd.DataFrame(dict(A=A, B=B))
+   df = pd.DataFrame({'A': A, 'B': B})
    df
 
    df.min()
@@ -232,7 +229,8 @@ Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` ob
 
 .. ipython:: python
 
-   y2 = pd.Series(pd.to_timedelta(['-1 days +00:00:05', 'nat', '-1 days +00:00:05', '1 days']))
+   y2 = pd.Series(pd.to_timedelta(['-1 days +00:00:05', 'nat',
+                                   '-1 days +00:00:05', '1 days']))
    y2
    y2.mean()
    y2.median()
@@ -250,8 +248,10 @@ Note that division by the NumPy scalar is true division, while astyping is equiv
 
 .. ipython:: python
 
-   td = pd.Series(pd.date_range('20130101', periods=4)) - \
-        pd.Series(pd.date_range('20121201', periods=4))
+   december = pd.Series(pd.date_range('20121201', periods=4))
+   january = pd.Series(pd.date_range('20130101', periods=4))
+   td = january - december
+
    td[2] += datetime.timedelta(minutes=5, seconds=3)
    td[3] = np.nan
    td
@@ -360,8 +360,8 @@ or ``np.timedelta64`` objects. Passing ``np.nan/pd.NaT/nat`` will represent miss
 
 .. ipython:: python
 
-   pd.TimedeltaIndex(['1 days', '1 days, 00:00:05',
-                     np.timedelta64(2,'D'), datetime.timedelta(days=2,seconds=2)])
+   pd.TimedeltaIndex(['1 days', '1 days, 00:00:05', np.timedelta64(2, 'D'),
+                      datetime.timedelta(days=2, seconds=2)])
 
 The string 'infer' can be passed in order to set the frequency of the index as the
 inferred frequency upon creation:
@@ -458,7 +458,7 @@ Similarly to frequency conversion on a ``Series`` above, you can convert these i
 
 .. ipython:: python
 
-   tdi / np.timedelta64(1,'s')
+   tdi / np.timedelta64(1, 's')
    tdi.astype('timedelta64[s]')
 
 Scalars type ops work as well. These can potentially return a *different* type of index.

From c05b73fb7ca79897e220e411cf70a521a1997a6c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 26 Nov 2018 17:43:17 -0800
Subject: [PATCH 04/78] implement deprecation portion of #23675 (#23937)

---
 doc/source/whatsnew/v0.24.0.rst               |  1 +
 pandas/core/arrays/datetimes.py               | 82 ++++++++++++++++++-
 pandas/core/indexes/datetimes.py              | 50 +++++------
 pandas/core/tools/datetimes.py                | 12 ++-
 .../indexes/datetimes/test_construction.py    | 40 +++++++++
 5 files changed, 157 insertions(+), 28 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 6fd0a224b81b2b..1127d02f0a8221 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1043,6 +1043,7 @@ Deprecations
   `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`)
 - :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`)
 - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`)
+- Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23675`)
 - The ``keep_tz=False`` option (the default) of the ``keep_tz`` keyword of
   :meth:`DatetimeIndex.to_series` is deprecated (:issue:`17832`).
 - Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`)
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index ae366149ab8999..41c28c5ee3b93b 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -15,8 +15,9 @@
 from pandas.util._decorators import Appender, cache_readonly
 
 from pandas.core.dtypes.common import (
-    _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_int64_dtype,
-    is_object_dtype)
+    _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type,
+    is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype,
+    is_timedelta64_dtype)
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna
@@ -1421,6 +1422,83 @@ def to_julian_date(self):
 DatetimeArrayMixin._add_datetimelike_methods()
 
 
+# -------------------------------------------------------------------
+# Constructor Helpers
+
+def maybe_infer_tz(tz, inferred_tz):
+    """
+    If a timezone is inferred from data, check that it is compatible with
+    the user-provided timezone, if any.
+
+    Parameters
+    ----------
+    tz : tzinfo or None
+    inferred_tz : tzinfo or None
+
+    Returns
+    -------
+    tz : tzinfo or None
+
+    Raises
+    ------
+    TypeError : if both timezones are present but do not match
+    """
+    if tz is None:
+        tz = inferred_tz
+    elif inferred_tz is None:
+        pass
+    elif not timezones.tz_compare(tz, inferred_tz):
+        raise TypeError('data is already tz-aware {inferred_tz}, unable to '
+                        'set specified tz: {tz}'
+                        .format(inferred_tz=inferred_tz, tz=tz))
+    return tz
+
+
+def maybe_convert_dtype(data, copy):
+    """
+    Convert data based on dtype conventions, issuing deprecation warnings
+    or errors where appropriate.
+     Parameters
+    ----------
+    data : np.ndarray or pd.Index
+    copy : bool
+     Returns
+    -------
+    data : np.ndarray or pd.Index
+    copy : bool
+     Raises
+    ------
+    TypeError : PeriodDType data is passed
+    """
+    if is_float_dtype(data):
+        # Note: we must cast to datetime64[ns] here in order to treat these
+        #  as wall-times instead of UTC timestamps.
+        data = data.astype(_NS_DTYPE)
+        copy = False
+        # TODO: deprecate this behavior to instead treat symmetrically
+        #  with integer dtypes.  See discussion in GH#23675
+
+    elif is_timedelta64_dtype(data):
+        warnings.warn("Passing timedelta64-dtype data is deprecated, will "
+                      "raise a TypeError in a future version",
+                      FutureWarning, stacklevel=3)
+        data = data.view(_NS_DTYPE)
+
+    elif is_period_dtype(data):
+        # Note: without explicitly raising here, PeriondIndex
+        #  test_setops.test_join_does_not_recur fails
+        raise TypeError("Passing PeriodDtype data is invalid.  "
+                        "Use `data.to_timestamp()` instead")
+
+    elif is_extension_type(data) and not is_datetime64tz_dtype(data):
+        # Includes categorical
+        # TODO: We have no tests for these
+        data = np.array(data, dtype=np.object_)
+        copy = False
+
+    return data, copy
+
+
 def _generate_regular_range(cls, start, end, periods, freq):
     """
     Generate a range of dates with the spans between dates described by
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 1ba14ffce383be..72bfefaf331514 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -16,16 +16,17 @@
 
 from pandas.core.dtypes.common import (
     _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype,
-    is_datetime64_ns_dtype, is_datetimetz, is_dtype_equal, is_float,
-    is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar,
-    is_string_like, pandas_dtype)
+    is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimetz,
+    is_dtype_equal, is_float, is_integer, is_integer_dtype, is_list_like,
+    is_period_dtype, is_scalar, is_string_like, pandas_dtype)
 import pandas.core.dtypes.concat as _concat
 from pandas.core.dtypes.generic import ABCSeries
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.arrays import datetimelike as dtl
 from pandas.core.arrays.datetimes import (
-    DatetimeArrayMixin as DatetimeArray, _to_m8)
+    DatetimeArrayMixin as DatetimeArray, _to_m8, maybe_convert_dtype,
+    maybe_infer_tz)
 from pandas.core.base import _shared_docs
 import pandas.core.common as com
 from pandas.core.indexes.base import Index, _index_shared_docs
@@ -246,50 +247,49 @@ def __new__(cls, data=None,
             name = data.name
 
         freq, freq_infer = dtl.maybe_infer_freq(freq)
+        if freq is None and hasattr(data, "freq"):
+            # i.e. DatetimeArray/Index
+            freq = data.freq
+            verify_integrity = False
 
         # if dtype has an embedded tz, capture it
         tz = dtl.validate_tz_from_dtype(dtype, tz)
 
-        if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArray)):
-            # other iterable of some kind
-            if not isinstance(data, (list, tuple)):
+        if not hasattr(data, "dtype"):
+            # e.g. list, tuple
+            if np.ndim(data) == 0:
+                # i.e. generator
                 data = list(data)
-            data = np.asarray(data, dtype='O')
+            data = np.asarray(data)
+            copy = False
         elif isinstance(data, ABCSeries):
             data = data._values
 
-        # data must be Index or np.ndarray here
+        # By this point we are assured to have either a numpy array or Index
+        data, copy = maybe_convert_dtype(data, copy)
+
         if not (is_datetime64_dtype(data) or is_datetimetz(data) or
                 is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'):
             data = tools.to_datetime(data, dayfirst=dayfirst,
                                      yearfirst=yearfirst)
 
-        if isinstance(data, DatetimeArray):
-            if tz is None:
-                tz = data.tz
-            elif data.tz is None:
-                data = data.tz_localize(tz, ambiguous=ambiguous)
-            else:
-                # the tz's must match
-                if not timezones.tz_compare(tz, data.tz):
-                    msg = ('data is already tz-aware {0}, unable to '
-                           'set specified tz: {1}')
-                    raise TypeError(msg.format(data.tz, tz))
-
+        if is_datetime64tz_dtype(data):
+            tz = maybe_infer_tz(tz, data.tz)
             subarr = data._data
 
-            if freq is None:
-                freq = data.freq
-                verify_integrity = False
-        elif issubclass(data.dtype.type, np.datetime64):
+        elif is_datetime64_dtype(data):
+            # tz-naive DatetimeArray/Index or ndarray[datetime64]
+            data = getattr(data, "_data", data)
             if data.dtype != _NS_DTYPE:
                 data = conversion.ensure_datetime64ns(data)
+
             if tz is not None:
                 # Convert tz-naive to UTC
                 tz = timezones.maybe_get_tz(tz)
                 data = conversion.tz_localize_to_utc(data.view('i8'), tz,
                                                      ambiguous=ambiguous)
             subarr = data.view(_NS_DTYPE)
+
         else:
             # must be integer dtype otherwise
             # assume this data are epoch timestamps
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 2c6fdb3eaf03c5..ee44a64514f4f1 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -171,6 +171,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
         - ndarray of Timestamps if box=False
     """
     from pandas import DatetimeIndex
+    from pandas.core.arrays.datetimes import maybe_convert_dtype
+
     if isinstance(arg, (list, tuple)):
         arg = np.array(arg, dtype='O')
 
@@ -208,6 +210,11 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
         raise TypeError('arg must be a string, datetime, list, tuple, '
                         '1-d array, or Series')
 
+    # warn if passing timedelta64, raise for PeriodDtype
+    # NB: this must come after unit transformation
+    orig_arg = arg
+    arg, _ = maybe_convert_dtype(arg, copy=False)
+
     arg = ensure_object(arg)
     require_iso8601 = False
 
@@ -231,7 +238,10 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
             # shortcut formatting here
             if format == '%Y%m%d':
                 try:
-                    result = _attempt_YYYYMMDD(arg, errors=errors)
+                    # pass orig_arg as float-dtype may have been converted to
+                    # datetime64[ns]
+                    orig_arg = ensure_object(orig_arg)
+                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
                 except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
                     raise ValueError("cannot convert the input to "
                                      "'%Y%m%d' date format")
diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py
index 02755c7e58a1db..6651a27098630d 100644
--- a/pandas/tests/indexes/datetimes/test_construction.py
+++ b/pandas/tests/indexes/datetimes/test_construction.py
@@ -14,11 +14,51 @@
 from pandas import (
     DatetimeIndex, Index, Timestamp, date_range, datetime, offsets,
     to_datetime)
+from pandas.core.arrays import period_array
 import pandas.util.testing as tm
 
 
 class TestDatetimeIndex(object):
 
+    def test_dti_with_period_data_raises(self):
+        # GH#23675
+        data = pd.PeriodIndex(['2016Q1', '2016Q2'], freq='Q')
+
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            DatetimeIndex(data)
+
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            to_datetime(data)
+
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            DatetimeIndex(period_array(data))
+
+        with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
+            to_datetime(period_array(data))
+
+    def test_dti_with_timedelta64_data_deprecation(self):
+        # GH#23675
+        data = np.array([0], dtype='m8[ns]')
+        with tm.assert_produces_warning(FutureWarning):
+            result = DatetimeIndex(data)
+
+        assert result[0] == Timestamp('1970-01-01')
+
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = to_datetime(data)
+
+        assert result[0] == Timestamp('1970-01-01')
+
+        with tm.assert_produces_warning(FutureWarning):
+            result = DatetimeIndex(pd.TimedeltaIndex(data))
+
+        assert result[0] == Timestamp('1970-01-01')
+
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result = to_datetime(pd.TimedeltaIndex(data))
+
+        assert result[0] == Timestamp('1970-01-01')
+
     def test_construction_caching(self):
 
         df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3),

From 11e2cb77534c8dac6dc6968dc5223baef94164b6 Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Tue, 27 Nov 2018 01:50:20 +0000
Subject: [PATCH 05/78] Increase timeout for flakey test (#23849)

---
 pandas/tests/frame/test_apply.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py
index c43872bfc3ddb1..a3b72d223f9573 100644
--- a/pandas/tests/frame/test_apply.py
+++ b/pandas/tests/frame/test_apply.py
@@ -11,7 +11,7 @@
 
 import warnings
 import numpy as np
-from hypothesis import given
+from hypothesis import given, settings
 from hypothesis.strategies import composite, dates, integers, sampled_from
 
 from pandas import (notna, DataFrame, Series, MultiIndex, date_range,
@@ -1157,6 +1157,7 @@ def test_agg_cython_table_raises(self, df, func, expected, axis):
             df.agg(func, axis=axis)
 
     @given(index=indices(max_length=5), num_columns=integers(0, 5))
+    @settings(deadline=1000)
     def test_frequency_is_original(self, index, num_columns):
         # GH 22150
         original = index.copy()

From feb9c6e37ea83ce4427b7748adde56a3481bcdf9 Mon Sep 17 00:00:00 2001
From: Kimi Li <kimi.jx.li@gmail.com>
Date: Mon, 26 Nov 2018 21:24:45 -0500
Subject: [PATCH 06/78] DOC: Move content in doc/README.rst to
 doc/source/contributing.rst (#23840)

---
 doc/README.rst | 174 +------------------------------------------------
 1 file changed, 1 insertion(+), 173 deletions(-)

diff --git a/doc/README.rst b/doc/README.rst
index 12950d323f5d34..a11ed8d9d03e30 100644
--- a/doc/README.rst
+++ b/doc/README.rst
@@ -1,173 +1 @@
-.. _contributing.docs:
-
-Contributing to the documentation
-=================================
-
-Whether you are someone who loves writing, teaching, or development,
-contributing to the documentation is a huge value. If you don't see yourself
-as a developer type, please don't stress and know that we want you to
-contribute. You don't even have to be an expert on *pandas* to do so!
-Something as simple as rewriting small passages for clarity
-as you reference the docs is a simple but effective way to contribute. The
-next person to read that passage will be in your debt!
-
-Actually, there are sections of the docs that are worse off by being written
-by experts. If something in the docs doesn't make sense to you, updating the
-relevant section after you figure it out is a simple way to ensure it will
-help the next person.
-
-.. contents:: Table of contents:
-   :local:
-
-
-About the pandas documentation
-------------------------------
-
-The documentation is written in **reStructuredText**, which is almost like writing
-in plain English, and built using `Sphinx <http://sphinx.pocoo.org/>`__. The
-Sphinx Documentation has an excellent `introduction to reST
-<http://sphinx.pocoo.org/rest.html>`__. Review the Sphinx docs to perform more
-complex changes to the documentation as well.
-
-Some other important things to know about the docs:
-
-- The pandas documentation consists of two parts: the docstrings in the code
-  itself and the docs in this folder ``pandas/doc/``.
-
-  The docstrings provide a clear explanation of the usage of the individual
-  functions, while the documentation in this folder consists of tutorial-like
-  overviews per topic together with some other information (what's new,
-  installation, etc).
-
-- The docstrings follow the **Numpy Docstring Standard** which is used widely
-  in the Scientific Python community. This standard specifies the format of
-  the different sections of the docstring. See `this document
-  <https://numpydoc.readthedocs.io/en/latest/>`_
-  for a detailed explanation, or look at some of the existing functions to
-  extend it in a similar manner.
-
-- The tutorials make heavy use of the `ipython directive
-  <http://matplotlib.org/sampledoc/ipython_directive.html>`_ sphinx extension.
-  This directive lets you put code in the documentation which will be run
-  during the doc build. For example:
-
-  ::
-
-      .. ipython:: python
-
-          x = 2
-          x**3
-
-  will be rendered as
-
-  ::
-
-      In [1]: x = 2
-
-      In [2]: x**3
-      Out[2]: 8
-
-  This means that almost all code examples in the docs are always run (and the
-  output saved) during the doc build. This way, they will always be up to date,
-  but it makes the doc building a bit more complex.
-
-
-How to build the pandas documentation
--------------------------------------
-
-Requirements
-^^^^^^^^^^^^
-
-To build the pandas docs there are some extra requirements: you will need to
-have ``sphinx`` and ``ipython`` installed. `numpydoc
-<https://github.com/numpy/numpydoc>`_ is used to parse the docstrings that
-follow the Numpy Docstring Standard (see above), but you don't need to install
-this because a local copy of ``numpydoc`` is included in the pandas source
-code. `nbsphinx <https://nbsphinx.readthedocs.io/>`_ is used to convert
-Jupyter notebooks. You will need to install it if you intend to modify any of
-the notebooks included in the documentation.
-
-Furthermore, it is recommended to have all `optional dependencies
-<http://pandas.pydata.org/pandas-docs/dev/install.html#optional-dependencies>`_
-installed. This is not needed, but be aware that you will see some error
-messages. Because all the code in the documentation is executed during the doc
-build, the examples using this optional dependencies will generate errors.
-Run ``pd.show_versions()`` to get an overview of the installed version of all
-dependencies.
-
-.. warning::
-
-   Sphinx version >= 1.2.2 or the older 1.1.3 is required.
-
-Building pandas
-^^^^^^^^^^^^^^^
-
-For a step-by-step overview on how to set up your environment, to work with
-the pandas code and git, see `the developer pages
-<http://pandas.pydata.org/developers.html#working-with-the-code>`_.
-When you start to work on some docs, be sure to update your code to the latest
-development version ('master')::
-
-    git fetch upstream
-    git rebase upstream/master
-
-Often it will be necessary to rebuild the C extension after updating::
-
-    python setup.py build_ext --inplace
-
-Building the documentation
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-So how do you build the docs? Navigate to your local folder
-``pandas/doc/`` directory in the console and run::
-
-    python make.py html
-
-And then you can find the html output in the folder ``pandas/doc/build/html/``.
-
-The first time it will take quite a while, because it has to run all the code
-examples in the documentation and build all generated docstring pages.
-In subsequent evocations, sphinx will try to only build the pages that have
-been modified.
-
-If you want to do a full clean build, do::
-
-    python make.py clean
-    python make.py build
-
-
-Starting with 0.13.1 you can tell ``make.py`` to compile only a single section
-of the docs, greatly reducing the turn-around time for checking your changes.
-You will be prompted to delete `.rst` files that aren't required, since the
-last committed version can always be restored from git.
-
-::
-
-    #omit autosummary and API section
-    python make.py clean
-    python make.py --no-api
-
-    # compile the docs with only a single
-    # section, that which is in indexing.rst
-    python make.py clean
-    python make.py --single indexing
-
-For comparison, a full doc build may take 10 minutes. a ``-no-api`` build
-may take 3 minutes and a single section may take 15 seconds.
-
-Where to start?
----------------
-
-There are a number of issues listed under `Docs
-<https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open>`_
-and `good first issue
-<https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open>`_
-where you could start out.
-
-Or maybe you have an idea of your own, by using pandas, looking for something
-in the documentation and thinking 'this can be improved', let's do something
-about that!
-
-Feel free to ask questions on `mailing list
-<https://groups.google.com/forum/?fromgroups#!forum/pydata>`_ or submit an
-issue on Github.
+See `contributing.rst <https://pandas.pydata.org/pandas-docs/stable/contributing.html>`_ in this repo.

From 5e0c392aabbbde49604bf58ed216bcc61b6b5794 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Tue, 27 Nov 2018 02:37:14 +0000
Subject: [PATCH 07/78] CLN: io/formats/html.py: refactor (#23818)

---
 doc/source/whatsnew/v0.24.0.rst |  1 +
 pandas/io/formats/format.py     |  8 ++-----
 pandas/io/formats/html.py       | 37 ++++++++++++++-------------------
 3 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 1127d02f0a8221..0a066399e27ca8 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -303,6 +303,7 @@ Backwards incompatible API changes
 - A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`)
 - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
 - Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
+- ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
 
 .. _whatsnew_0240.api_breaking.deps:
 
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 4c08ee89c33dfc..ff9ee9ed7296a5 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -730,12 +730,8 @@ def to_html(self, classes=None, notebook=False, border=None):
             .. versionadded:: 0.19.0
          """
         from pandas.io.formats.html import HTMLFormatter
-        html_renderer = HTMLFormatter(self, classes=classes,
-                                      max_rows=self.max_rows,
-                                      max_cols=self.max_cols,
-                                      notebook=notebook,
-                                      border=border,
-                                      table_id=self.table_id)
+        html_renderer = HTMLFormatter(self, classes=classes, notebook=notebook,
+                                      border=border, table_id=self.table_id)
         if hasattr(self.buf, 'write'):
             html_renderer.write_result(self.buf)
         elif isinstance(self.buf, compat.string_types):
diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py
index bc2de210df3f48..bf92ce7ee0f679 100644
--- a/pandas/io/formats/html.py
+++ b/pandas/io/formats/html.py
@@ -25,8 +25,8 @@ class HTMLFormatter(TableFormatter):
 
     indent_delta = 2
 
-    def __init__(self, formatter, classes=None, max_rows=None, max_cols=None,
-                 notebook=False, border=None, table_id=None):
+    def __init__(self, formatter, classes=None, notebook=False, border=None,
+                 table_id=None):
         self.fmt = formatter
         self.classes = classes
 
@@ -35,18 +35,21 @@ def __init__(self, formatter, classes=None, max_rows=None, max_cols=None,
         self.elements = []
         self.bold_rows = self.fmt.kwds.get('bold_rows', False)
         self.escape = self.fmt.kwds.get('escape', True)
-
-        self.max_rows = max_rows or len(self.fmt.frame)
-        self.max_cols = max_cols or len(self.fmt.columns)
         self.show_dimensions = self.fmt.show_dimensions
-        self.is_truncated = (self.max_rows < len(self.fmt.frame) or
-                             self.max_cols < len(self.fmt.columns))
         self.notebook = notebook
         if border is None:
             border = get_option('display.html.border')
         self.border = border
         self.table_id = table_id
 
+    @property
+    def is_truncated(self):
+        return self.fmt.is_truncated
+
+    @property
+    def ncols(self):
+        return len(self.fmt.tr_frame.columns)
+
     def write(self, s, indent=0):
         rs = pprint_thing(s)
         self.elements.append(' ' * indent + rs)
@@ -301,12 +304,8 @@ def _write_header(self, indent):
         if all((self.fmt.has_index_names,
                 self.fmt.index,
                 self.fmt.show_index_names)):
-            row = ([x if x is not None else ''
-                    for x in self.frame.index.names] +
-                   [''] * min(len(self.columns), self.max_cols))
-            if truncate_h:
-                ins_col = row_levels + self.fmt.tr_col_num
-                row.insert(ins_col, '')
+            row = ([x if x is not None else '' for x in self.frame.index.names]
+                   + [''] * (self.ncols + (1 if truncate_h else 0)))
             self.write_tr(row, indent, self.indent_delta, header=True)
 
         indent -= self.indent_delta
@@ -318,9 +317,7 @@ def _write_body(self, indent):
         self.write('<tbody>', indent)
         indent += self.indent_delta
 
-        fmt_values = {}
-        for i in range(min(len(self.columns), self.max_cols)):
-            fmt_values[i] = self.fmt._format_col(i)
+        fmt_values = {i: self.fmt._format_col(i) for i in range(self.ncols)}
 
         # write values
         if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex):
@@ -338,7 +335,6 @@ def _write_regular_rows(self, fmt_values, indent):
         truncate_h = self.fmt.truncate_h
         truncate_v = self.fmt.truncate_v
 
-        ncols = len(self.fmt.tr_frame.columns)
         nrows = len(self.fmt.tr_frame)
 
         if self.fmt.index:
@@ -362,7 +358,7 @@ def _write_regular_rows(self, fmt_values, indent):
             row = []
             if self.fmt.index:
                 row.append(index_values[i])
-            row.extend(fmt_values[j][i] for j in range(ncols))
+            row.extend(fmt_values[j][i] for j in range(self.ncols))
 
             if truncate_h:
                 dot_col_ix = self.fmt.tr_col_num + row_levels
@@ -376,7 +372,6 @@ def _write_hierarchical_rows(self, fmt_values, indent):
         truncate_h = self.fmt.truncate_h
         truncate_v = self.fmt.truncate_v
         frame = self.fmt.tr_frame
-        ncols = len(frame.columns)
         nrows = len(frame)
         row_levels = self.frame.index.nlevels
 
@@ -454,7 +449,7 @@ def _write_hierarchical_rows(self, fmt_values, indent):
                     j += 1
                     row.append(v)
 
-                row.extend(fmt_values[j][i] for j in range(ncols))
+                row.extend(fmt_values[j][i] for j in range(self.ncols))
                 if truncate_h:
                     row.insert(row_levels - sparse_offset +
                                self.fmt.tr_col_num, '...')
@@ -466,7 +461,7 @@ def _write_hierarchical_rows(self, fmt_values, indent):
                     sparsify=False, adjoin=False, names=False)))
                 row = []
                 row.extend(idx_values[i])
-                row.extend(fmt_values[j][i] for j in range(ncols))
+                row.extend(fmt_values[j][i] for j in range(self.ncols))
                 if truncate_h:
                     row.insert(row_levels + self.fmt.tr_col_num, '...')
                 self.write_tr(row, indent, self.indent_delta, tags=None,

From 6af9c30023df41cb333b08032f54d8b1fda4f922 Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Tue, 27 Nov 2018 03:50:22 +0100
Subject: [PATCH 08/78] CLN/DEPS: Clean up post numpy bump to 1.12 (#23796)

---
 README.md                                     |  2 +-
 ci/azure/linux.yml                            |  2 +-
 pandas/_libs/algos_rank_helper.pxi.in         | 10 +------
 pandas/_libs/lib.pyx                          |  4 +--
 pandas/_libs/sparse.pyx                       |  8 ------
 pandas/_libs/sparse_op_helper.pxi.in          |  7 -----
 pandas/core/arrays/categorical.py             | 12 ++------
 pandas/core/arrays/datetimelike.py            |  3 +-
 pandas/core/arrays/sparse.py                  |  3 --
 pandas/core/dtypes/cast.py                    | 26 +++--------------
 pandas/core/dtypes/dtypes.py                  |  7 +----
 pandas/core/indexes/datetimes.py              |  5 ----
 pandas/core/internals/blocks.py               | 22 ++-------------
 pandas/core/internals/managers.py             | 28 +++----------------
 pandas/core/reshape/tile.py                   |  3 +-
 pandas/io/packers.py                          |  2 +-
 pandas/io/pickle.py                           |  8 ------
 pandas/plotting/_misc.py                      |  3 --
 pandas/tests/arrays/categorical/test_repr.py  |  1 -
 pandas/tests/dtypes/test_inference.py         |  5 +---
 pandas/tests/frame/test_constructors.py       |  8 +++---
 pandas/tests/frame/test_operators.py          |  1 -
 pandas/tests/indexes/common.py                |  5 +---
 pandas/tests/indexes/multi/test_analytics.py  |  5 +---
 .../indexing/test_chaining_and_caching.py     |  1 -
 pandas/tests/indexing/test_iloc.py            |  2 +-
 pandas/tests/io/test_pytables.py              |  2 --
 pandas/tests/plotting/test_datetimelike.py    |  1 -
 pandas/tests/plotting/test_frame.py           | 13 +++------
 pandas/tests/plotting/test_series.py          |  4 +--
 pandas/tests/series/test_analytics.py         | 10 ++-----
 pandas/tests/test_base.py                     |  9 +++---
 pandas/tests/test_nanops.py                   |  1 -
 pandas/tests/test_panel.py                    |  1 -
 pandas/tests/test_sorting.py                  |  7 -----
 35 files changed, 40 insertions(+), 191 deletions(-)

diff --git a/README.md b/README.md
index b4dedecb4c6971..1993b1ecb9dc1b 100644
--- a/README.md
+++ b/README.md
@@ -171,7 +171,7 @@ pip install pandas
 ```
 
 ## Dependencies
-- [NumPy](https://www.numpy.org): 1.9.0 or higher
+- [NumPy](https://www.numpy.org): 1.12.0 or higher
 - [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher
 - [pytz](https://pythonhosted.org/pytz): 2011k or higher
 
diff --git a/ci/azure/linux.yml b/ci/azure/linux.yml
index a773a06c193d45..7fa8a9a1783f90 100644
--- a/ci/azure/linux.yml
+++ b/ci/azure/linux.yml
@@ -9,7 +9,7 @@ jobs:
   strategy:
     maxParallel: 11
     matrix:
-      py27_np_19:
+      py27_np_120:
         ENV_FILE: ci/deps/azure-27-compat.yaml
         CONDA_PY: "27"
         TEST_ARGS: "--skip-slow --skip-network"
diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in
index 5ffc6dd5780235..5dac94394c7ed0 100644
--- a/pandas/_libs/algos_rank_helper.pxi.in
+++ b/pandas/_libs/algos_rank_helper.pxi.in
@@ -102,15 +102,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
     ranks = np.empty(n, dtype='f8')
 
     {{if dtype == 'object'}}
-
-    try:
-        _as = np.lexsort(keys=order)
-    except TypeError:
-        # lexsort on object array will raise TypeError for numpy version
-        # earlier than 1.11.0. Use argsort with order argument instead.
-        _dt = [('values', 'O'), ('mask', '?')]
-        _values = np.asarray(list(zip(order[0], order[1])), dtype=_dt)
-        _as = np.argsort(_values, kind='mergesort', order=('mask', 'values'))
+    _as = np.lexsort(keys=order)
     {{else}}
     if tiebreak == TIEBREAK_FIRST:
         # need to use a stable sort here
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index ad538ff103c2f3..874206378f79cd 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -518,9 +518,7 @@ def astype_intsafe(ndarray[object] arr, new_dtype):
         bint is_datelike
         ndarray result
 
-    # on 32-bit, 1.6.2 numpy M8[ns] is a subdtype of integer, which is weird
-    is_datelike = new_dtype in ['M8[ns]', 'm8[ns]']
-
+    is_datelike = new_dtype == 'm8[ns]'
     result = np.empty(n, dtype=new_dtype)
     for i in range(n):
         val = arr[i]
diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
index 668bd0ae6bbb75..f5980998f6db47 100644
--- a/pandas/_libs/sparse.pyx
+++ b/pandas/_libs/sparse.pyx
@@ -8,14 +8,6 @@ from numpy cimport (ndarray, uint8_t, int64_t, int32_t, int16_t, int8_t,
 cnp.import_array()
 
 
-from distutils.version import LooseVersion
-
-# numpy versioning
-_np_version = np.version.short_version
-_np_version_under1p10 = LooseVersion(_np_version) < LooseVersion('1.10')
-_np_version_under1p11 = LooseVersion(_np_version) < LooseVersion('1.11')
-
-
 # -----------------------------------------------------------------------------
 # Preamble stuff
 
diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in
index 1f41096a3f1942..c6621ab5977caf 100644
--- a/pandas/_libs/sparse_op_helper.pxi.in
+++ b/pandas/_libs/sparse_op_helper.pxi.in
@@ -42,13 +42,6 @@ cdef inline sparse_t __mod__(sparse_t a, sparse_t b):
 cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b):
     if b == 0:
         if sparse_t is float64_t:
-            # numpy >= 1.11 returns NaN
-            # for a // 0, rather than +-inf
-            if _np_version_under1p11:
-                if a > 0:
-                    return INF
-                elif a < 0:
-                    return -INF
             return NaN
         else:
             return 0
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 6dc3a960dc817b..24df4c7a9f3798 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -98,7 +98,7 @@ def f(self, other):
                 ret[na_mask] = False
             return ret
 
-        # Numpy-1.9 and earlier may convert a scalar to a zerodim array during
+        # Numpy < 1.13 may convert a scalar to a zerodim array during
         # comparison operation when second arg has higher priority, e.g.
         #
         #     cat[0] < cat
@@ -2038,15 +2038,7 @@ def __setitem__(self, key, value):
         elif isinstance(key, slice):
             pass
 
-        # Array of True/False in Series or Categorical
-        else:
-            # There is a bug in numpy, which does not accept a Series as a
-            # indexer
-            # https://github.com/pandas-dev/pandas/issues/6168
-            # https://github.com/numpy/numpy/issues/4240 -> fixed in numpy 1.9
-            # FIXME: remove when numpy 1.9 is the lowest numpy version pandas
-            # accepts...
-            key = np.asarray(key)
+        # else: array of True/False in Series or Categorical
 
         lindexer = self.categories.get_indexer(rvalue)
         lindexer = self._maybe_coerce_indexer(lindexer)
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 4e784d9c89c5f1..83ee335aa54657 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -835,8 +835,7 @@ def __isub__(self, other):
     def _evaluate_compare(self, other, op):
         """
         We have been called because a comparison between
-        8 aware arrays. numpy >= 1.11 will
-        now warn about NaT comparisons
+        8 aware arrays. numpy will warn about NaT comparisons
         """
         # Called by comparison methods when comparing datetimelike
         # with datetimelike
diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
index 24e33c32d08986..9a5ef3b3a7dd0e 100644
--- a/pandas/core/arrays/sparse.py
+++ b/pandas/core/arrays/sparse.py
@@ -1015,9 +1015,6 @@ def __getitem__(self, key):
                     key = np.asarray(key)
 
             if com.is_bool_indexer(key) and len(self) == len(key):
-                # TODO(numpy 1.11): Remove this asarray.
-                # Old NumPy didn't treat array-like as boolean masks.
-                key = np.asarray(key)
                 return self.take(np.arange(len(key), dtype=np.int32)[key])
             elif hasattr(key, '__len__'):
                 return self.take(key)
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 3c5f8830441f71..afe6ba45bb4005 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -263,28 +263,10 @@ def maybe_promote(dtype, fill_value=np.nan):
             fill_value = np.nan
 
     # returns tuple of (dtype, fill_value)
-    if issubclass(dtype.type, (np.datetime64, np.timedelta64)):
-        # for now: refuse to upcast datetime64
-        # (this is because datetime64 will not implicitly upconvert
-        #  to object correctly as of numpy 1.6.1)
-        if isna(fill_value):
-            fill_value = iNaT
-        else:
-            if issubclass(dtype.type, np.datetime64):
-                try:
-                    fill_value = tslibs.Timestamp(fill_value).value
-                except Exception:
-                    # the proper thing to do here would probably be to upcast
-                    # to object (but numpy 1.6.1 doesn't do this properly)
-                    fill_value = iNaT
-            elif issubclass(dtype.type, np.timedelta64):
-                try:
-                    fill_value = tslibs.Timedelta(fill_value).value
-                except Exception:
-                    # as for datetimes, cannot upcast to object
-                    fill_value = iNaT
-            else:
-                fill_value = iNaT
+    if issubclass(dtype.type, np.datetime64):
+        fill_value = tslibs.Timestamp(fill_value).value
+    elif issubclass(dtype.type, np.timedelta64):
+        fill_value = tslibs.Timedelta(fill_value).value
     elif is_datetimetz(dtype):
         if isna(fill_value):
             fill_value = iNaT
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index bd7c6630c7c5df..fee983f9692216 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -338,12 +338,7 @@ def _hash_categories(categories, ordered=True):
             cat_array = [cat_array]
         hashed = _combine_hash_arrays(iter(cat_array),
                                       num_items=len(cat_array))
-        if len(hashed) == 0:
-            # bug in Numpy<1.12 for length 0 arrays. Just return the correct
-            # value of 0
-            return 0
-        else:
-            return np.bitwise_xor.reduce(hashed)
+        return np.bitwise_xor.reduce(hashed)
 
     @classmethod
     def construct_array_type(cls):
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 72bfefaf331514..61e8d6344a0e9e 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -415,11 +415,6 @@ def __setstate__(self, state):
                 self._freq = own_state[1]
                 self._tz = timezones.tz_standardize(own_state[2])
 
-                # provide numpy < 1.7 compat
-                if nd_state[2] == 'M8[us]':
-                    new_state = np.ndarray.__reduce__(data.astype('M8[ns]'))
-                    np.ndarray.__setstate__(data, new_state[2])
-
             else:  # pragma: no cover
                 data = np.empty(state)
                 np.ndarray.__setstate__(data, state)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 857bf18c5982be..21dae455cac1b5 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1458,11 +1458,6 @@ def quantile(self, qs, interpolation='linear', axis=0, axes=None):
 
         def _nanpercentile1D(values, mask, q, **kw):
             # mask is Union[ExtensionArray, ndarray]
-            # we convert to an ndarray for NumPy 1.9 compat, which didn't
-            # treat boolean-like arrays as boolean. This conversion would have
-            # been done inside ndarray.__getitem__ anyway, since values is
-            # an ndarray at this point.
-            mask = np.asarray(mask)
             values = values[~mask]
 
             if len(values) == 0:
@@ -2781,9 +2776,7 @@ def set(self, locs, values, check=False):
         -------
         None
         """
-        if values.dtype != _NS_DTYPE:
-            # Workaround for numpy 1.6 bug
-            values = conversion.ensure_datetime64ns(values)
+        values = conversion.ensure_datetime64ns(values, copy=False)
 
         self.values[locs] = values
 
@@ -3102,7 +3095,7 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True):
         # FIXME: optimization potential in case all mgrs contain slices and
         # combination of those slices is a slice, too.
         new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks])
-        new_values = _vstack([b.values for b in blocks], dtype)
+        new_values = np.vstack([b.values for b in blocks])
 
         argsort = np.argsort(new_mgr_locs)
         new_values = new_values[argsort]
@@ -3114,17 +3107,6 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True):
     return blocks
 
 
-def _vstack(to_stack, dtype):
-
-    # work around NumPy 1.6 bug
-    if dtype == _NS_DTYPE or dtype == _TD_DTYPE:
-        new_values = np.vstack([x.view('i8') for x in to_stack])
-        return new_values.view(dtype)
-
-    else:
-        return np.vstack(to_stack)
-
-
 def _block2d_to_blocknd(values, placement, shape, labels, ref_items):
     """ pivot to the labels shape """
     panel_shape = (len(placement),) + shape
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index c3762d98191533..5f9860ce98b111 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -248,9 +248,6 @@ def __getstate__(self):
 
     def __setstate__(self, state):
         def unpickle_block(values, mgr_locs):
-            # numpy < 1.7 pickle compat
-            if values.dtype == 'M8[us]':
-                values = values.astype('M8[ns]')
             return make_block(values, placement=mgr_locs)
 
         if (isinstance(state, tuple) and len(state) >= 4 and
@@ -776,18 +773,6 @@ def _interleave(self):
 
         result = np.empty(self.shape, dtype=dtype)
 
-        if result.shape[0] == 0:
-            # Workaround for numpy 1.7 bug:
-            #
-            #     >>> a = np.empty((0,10))
-            #     >>> a[slice(0,0)]
-            #     array([], shape=(0, 10), dtype=float64)
-            #     >>> a[[]]
-            #     Traceback (most recent call last):
-            #       File "<stdin>", line 1, in <module>
-            #     IndexError: index 0 is out of bounds for axis 0 with size 0
-            return result
-
         itemmask = np.zeros(self.shape[0])
 
         for blk in self.blocks:
@@ -1170,8 +1155,7 @@ def insert(self, loc, item, value, allow_duplicates=False):
                 blk.mgr_locs = new_mgr_locs
 
         if loc == self._blklocs.shape[0]:
-            # np.append is a lot faster (at least in numpy 1.7.1), let's use it
-            # if we can.
+            # np.append is a lot faster, let's use it if we can.
             self._blklocs = np.append(self._blklocs, 0)
             self._blknos = np.append(self._blknos, len(self.blocks))
         else:
@@ -1995,13 +1979,9 @@ def _transform_index(index, func, level=None):
 
 def _fast_count_smallints(arr):
     """Faster version of set(arr) for sequences of small numbers."""
-    if len(arr) == 0:
-        # Handle empty arr case separately: numpy 1.6 chokes on that.
-        return np.empty((0, 2), dtype=arr.dtype)
-    else:
-        counts = np.bincount(arr.astype(np.int_))
-        nz = counts.nonzero()[0]
-        return np.c_[nz, counts[nz]]
+    counts = np.bincount(arr.astype(np.int_))
+    nz = counts.nonzero()[0]
+    return np.c_[nz, counts[nz]]
 
 
 def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill):
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index 4a863372eea137..8ad2a48e8767c4 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -334,8 +334,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None,
     ids = ensure_int64(bins.searchsorted(x, side=side))
 
     if include_lowest:
-        # Numpy 1.9 support: ensure this mask is a Numpy array
-        ids[np.asarray(x == bins[0])] = 1
+        ids[x == bins[0]] = 1
 
     na_mask = isna(x) | (ids == len(bins)) | (ids == 0)
     has_nas = na_mask.any()
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index cec7908f143a8f..7fc770efbeb377 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -250,7 +250,7 @@ def dtype_for(t):
             'complex128': np.float64,
             'complex64': np.float32}
 
-# numpy 1.6.1 compat
+# windows (32 bit) compat
 if hasattr(np, 'float128'):
     c2f_dict['complex256'] = np.float128
 
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index c89d1df8ee64ba..789f55a62dc58c 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -6,8 +6,6 @@
 
 from pandas.compat import PY3, BytesIO, cPickle as pkl, pickle_compat as pc
 
-from pandas.core.dtypes.common import _NS_DTYPE, is_datetime64_dtype
-
 from pandas.io.common import _get_handle, _stringify_path
 
 
@@ -200,10 +198,4 @@ def _pickle_array(arr):
 def _unpickle_array(bytes):
     arr = read_array(BytesIO(bytes))
 
-    # All datetimes should be stored as M8[ns].  When unpickling with
-    # numpy1.6, it will read these as M8[us].  So this ensures all
-    # datetime64 types are read as MS[ns]
-    if is_datetime64_dtype(arr):
-        arr = arr.view(_NS_DTYPE)
-
     return arr
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
index aeb97a84e594af..dbad5a04161c98 100644
--- a/pandas/plotting/_misc.py
+++ b/pandas/plotting/_misc.py
@@ -138,9 +138,6 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
 
 def _get_marker_compat(marker):
     import matplotlib.lines as mlines
-    import matplotlib as mpl
-    if mpl.__version__ < '1.1.0' and marker == '.':
-        return 'o'
     if marker not in mlines.lineMarkers:
         return 'o'
     return marker
diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
index 5f71d0148ee88f..227edf60951e68 100644
--- a/pandas/tests/arrays/categorical/test_repr.py
+++ b/pandas/tests/arrays/categorical/test_repr.py
@@ -37,7 +37,6 @@ def test_big_print(self):
     def test_empty_print(self):
         factor = Categorical([], ["a", "b", "c"])
         expected = ("[], Categories (3, object): [a, b, c]")
-        # hack because array_repr changed in numpy > 1.6.x
         actual = repr(factor)
         assert actual == expected
 
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index fd3222cd1119bd..20ad39e137d466 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -1272,10 +1272,7 @@ def test_nan_to_nat_conversions():
     s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan)
     assert (isna(s[8]))
 
-    # numpy < 1.7.0 is wrong
-    from distutils.version import LooseVersion
-    if LooseVersion(np.__version__) >= LooseVersion('1.7.0'):
-        assert (s[8].value == np.datetime64('NaT').astype(np.int64))
+    assert (s[8].value == np.datetime64('NaT').astype(np.int64))
 
 
 @td.skip_if_no_scipy
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index c71d5d9f977f69..76e92042cbe6a4 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -15,7 +15,7 @@
 
 from pandas.core.dtypes.common import is_integer_dtype
 from pandas.compat import (lmap, long, zip, range, lrange, lzip,
-                           OrderedDict, is_platform_little_endian, PY36)
+                           OrderedDict, is_platform_little_endian, PY3, PY36)
 from pandas import compat
 from pandas import (DataFrame, Index, Series, isna,
                     MultiIndex, Timedelta, Timestamp,
@@ -164,9 +164,9 @@ def test_constructor_dtype_str_na_values(self, string_dtype):
 
     def test_constructor_rec(self):
         rec = self.frame.to_records(index=False)
-
-        # Assigning causes segfault in NumPy < 1.5.1
-        # rec.dtype.names = list(rec.dtype.names)[::-1]
+        if PY3:
+            # unicode error under PY2
+            rec.dtype.names = list(rec.dtype.names)[::-1]
 
         index = self.frame.index
 
diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py
index bbe4914b5f4472..88c64bf9e9b978 100644
--- a/pandas/tests/frame/test_operators.py
+++ b/pandas/tests/frame/test_operators.py
@@ -190,7 +190,6 @@ def _check_unary_op(op):
         _check_bin_op(operator.or_)
         _check_bin_op(operator.xor)
 
-        # operator.neg is deprecated in numpy >= 1.9
         _check_unary_op(operator.inv)  # TODO: belongs elsewhere
 
     def test_logical_with_nas(self):
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
index 9f5885fb80bba3..0c886b9fd3c4ba 100644
--- a/pandas/tests/indexes/common.py
+++ b/pandas/tests/indexes/common.py
@@ -663,12 +663,9 @@ def test_equals_op(self):
             tm.assert_series_equal(series_a == item, Series(expected3))
 
     def test_numpy_ufuncs(self):
-        # test ufuncs of numpy 1.9.2. see:
+        # test ufuncs of numpy, see:
         # http://docs.scipy.org/doc/numpy/reference/ufuncs.html
 
-        # some functions are skipped because it may return different result
-        # for unicode input depending on numpy version
-
         for name, idx in compat.iteritems(self.indices):
             for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10,
                          np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin,
diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py
index 05adaada01ee50..3b40b2afe9c6d8 100644
--- a/pandas/tests/indexes/multi/test_analytics.py
+++ b/pandas/tests/indexes/multi/test_analytics.py
@@ -275,12 +275,9 @@ def test_map_dictlike(idx, mapper):
     np.rad2deg
 ])
 def test_numpy_ufuncs(func):
-    # test ufuncs of numpy 1.9.2. see:
+    # test ufuncs of numpy. see:
     # http://docs.scipy.org/doc/numpy/reference/ufuncs.html
 
-    # some functions are skipped because it may return different result
-    # for unicode input depending on numpy version
-
     # copy and paste from idx fixture as pytest doesn't support
     # parameters and fixtures at the same time.
     major_axis = Index(['foo', 'bar', 'baz', 'qux'])
diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py
index 8bc8cb3fb15354..f012c9c255cd91 100644
--- a/pandas/tests/indexing/test_chaining_and_caching.py
+++ b/pandas/tests/indexing/test_chaining_and_caching.py
@@ -94,7 +94,6 @@ class TestChaining(object):
     def test_setitem_chained_setfault(self):
 
         # GH6026
-        # setfaults under numpy 1.7.1 (ok on 1.8)
         data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout']
         mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none']
 
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 53d07aeef304aa..85b06001cf8a0d 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -337,7 +337,7 @@ def test_iloc_setitem_list(self):
         tm.assert_frame_equal(df, expected)
 
     def test_iloc_setitem_pandas_object(self):
-        # GH 17193, affecting old numpy (1.7 and 1.8)
+        # GH 17193
         s_orig = Series([0, 1, 2, 3])
         expected = Series([0, -1, -2, 3])
 
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index 4a68719eedc9aa..84a0e3d867783e 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -199,8 +199,6 @@ def roundtrip(key, obj, **kwargs):
     def test_long_strings(self):
 
         # GH6166
-        # unconversion of long strings was being chopped in earlier
-        # versions of numpy < 1.7.2
         df = DataFrame({'a': tm.rands_array(100, size=10)},
                        index=tm.rands_array(100, size=10))
 
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index 4865638671ea92..2e204f6d18d708 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -1075,7 +1075,6 @@ def test_irreg_dtypes(self):
         _, ax = self.plt.subplots()
         _check_plot_works(df.plot, ax=ax)
 
-    @pytest.mark.xfail(not PY3, reason="failing on mpl 1.4.3 on PY2")
     @pytest.mark.slow
     def test_time(self):
         t = datetime(1, 1, 1, 3, 30, 0)
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 25dfbaba762c95..f5708b24d22b13 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -69,8 +69,7 @@ def test_plot(self):
         self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
 
         df = DataFrame({'x': [1, 2], 'y': [3, 4]})
-        # mpl >= 1.5.2 (or slightly below) throw AttributError
-        with pytest.raises((TypeError, AttributeError)):
+        with pytest.raises(AttributeError, match='Unknown property blarg'):
             df.plot.line(blarg=True)
 
         df = DataFrame(np.random.rand(10, 3),
@@ -2967,13 +2966,9 @@ def test_passed_bar_colors(self):
     def test_rcParams_bar_colors(self):
         import matplotlib as mpl
         color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)]
-        try:  # mpl 1.5
-            with mpl.rc_context(
-                    rc={'axes.prop_cycle': mpl.cycler("color", color_tuples)}):
-                barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar")
-        except (AttributeError, KeyError):  # mpl 1.4
-            with mpl.rc_context(rc={'axes.color_cycle': color_tuples}):
-                barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar")
+        with mpl.rc_context(
+                rc={'axes.prop_cycle': mpl.cycler("color", color_tuples)}):
+            barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar")
         assert color_tuples == [c.get_facecolor() for c in barplot.patches]
 
     @pytest.mark.parametrize('method', ['line', 'barh', 'bar'])
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
index dc708278836d2f..e6519c7db7a7bf 100644
--- a/pandas/tests/plotting/test_series.py
+++ b/pandas/tests/plotting/test_series.py
@@ -767,8 +767,8 @@ def test_errorbar_plot(self):
             s.plot(yerr=np.arange(11))
 
         s_err = ['zzz'] * 10
-        # in mpl 1.5+ this is a TypeError
-        with pytest.raises((ValueError, TypeError)):
+        # MPL > 2.0.0 will most likely use TypeError here
+        with pytest.raises((TypeError, ValueError)):
             s.plot(yerr=s_err)
 
     @td.xfail_if_mpl_2_2
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 0ce9f885eb6389..89d76abaf5f827 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1015,12 +1015,6 @@ def test_clip_with_datetimes(self):
 
     def test_cummethods_bool(self):
         # GH 6270
-        # looks like a buggy np.maximum.accumulate for numpy 1.6.1, py 3.2
-        def cummin(x):
-            return np.minimum.accumulate(x)
-
-        def cummax(x):
-            return np.maximum.accumulate(x)
 
         a = pd.Series([False, False, False, True, True, False, False])
         b = ~a
@@ -1028,8 +1022,8 @@ def cummax(x):
         d = ~c
         methods = {'cumsum': np.cumsum,
                    'cumprod': np.cumprod,
-                   'cummin': cummin,
-                   'cummax': cummax}
+                   'cummin': np.minimum.accumulate,
+                   'cummax': np.maximum.accumulate}
         args = product((a, b, c, d), methods)
         for s, method in args:
             expected = Series(methods[method](s.values))
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 084477d8202b10..c7efc1efaee8f5 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -292,11 +292,10 @@ def test_none_comparison(self):
                 assert not result.iat[0]
                 assert not result.iat[1]
 
-                # this fails for numpy < 1.9
-                # and oddly for *some* platforms
-                # result = None != o  # noqa
-                # assert result.iat[0]
-                # assert result.iat[1]
+                result = None != o  # noqa
+                assert result.iat[0]
+                assert result.iat[1]
+
                 if (is_datetime64_dtype(o) or is_datetimetz(o)):
                     # Following DatetimeIndex (and Timestamp) convention,
                     # inequality comparisons with Series[datetime64] raise
diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
index 49dbccb82fac85..e214d4c1985a97 100644
--- a/pandas/tests/test_nanops.py
+++ b/pandas/tests/test_nanops.py
@@ -464,7 +464,6 @@ def test_nankurt(self):
                             allow_str=False, allow_date=False,
                             allow_tdelta=False)
 
-    @td.skip_if_no("numpy", min_version="1.10.0")
     def test_nanprod(self):
         self.check_funs(nanops.nanprod, np.prod, allow_str=False,
                         allow_date=False, allow_tdelta=False,
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index 6d5d07b00398c0..c0c4e627b1b2e6 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -85,7 +85,6 @@ def test_sum(self):
     def test_mean(self):
         self._check_stat_op('mean', np.mean)
 
-    @td.skip_if_no("numpy", min_version="1.10.0")
     def test_prod(self):
         self._check_stat_op('prod', np.prod, skipna_alternative=np.nanprod)
 
diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
index 22e758a0e59a72..333b93dbdf580e 100644
--- a/pandas/tests/test_sorting.py
+++ b/pandas/tests/test_sorting.py
@@ -127,13 +127,6 @@ def test_nargsort(self):
         # np.argsort(items2) may not place NaNs first
         items2 = np.array(items, dtype='O')
 
-        try:
-            # GH 2785; due to a regression in NumPy1.6.2
-            np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i'))
-            np.argsort(items2, kind='mergesort')
-        except TypeError:
-            pytest.skip('requested sort not available for type')
-
         # mergesort is the most difficult to get right because we want it to be
         # stable.
 

From 90961f2acad5c2db6aa957e2a79df8668f5b02d1 Mon Sep 17 00:00:00 2001
From: Fabian Haase <haase.fabian@gmail.com>
Date: Tue, 27 Nov 2018 03:51:35 +0100
Subject: [PATCH 09/78] DOC: Fix PEP-8 issues in 10min.rst (#23908)

---
 doc/source/10min.rst | 64 +++++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 30 deletions(-)

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
index b5938a24ce6c50..ddcb9b5fc26c75 100644
--- a/doc/source/10min.rst
+++ b/doc/source/10min.rst
@@ -5,19 +5,19 @@
 .. ipython:: python
    :suppress:
 
+   import os
    import numpy as np
+
    import pandas as pd
-   import os
+
    np.random.seed(123456)
    np.set_printoptions(precision=4, suppress=True)
-   import matplotlib
-   # matplotlib.style.use('default')
    pd.options.display.max_rows = 15
 
-   #### portions of this were borrowed from the
-   #### Pandas cheatsheet
-   #### created during the PyData Workshop-Sprint 2012
-   #### Hannah Chen, Henry Chow, Eric Cox, Robert Mauriello
+   # portions of this were borrowed from the
+   # Pandas cheatsheet
+   # created during the PyData Workshop-Sprint 2012
+   # Hannah Chen, Henry Chow, Eric Cox, Robert Mauriello
 
 
 ********************
@@ -31,9 +31,8 @@ Customarily, we import as follows:
 
 .. ipython:: python
 
-   import pandas as pd
    import numpy as np
-   import matplotlib.pyplot as plt
+   import pandas as pd
 
 Object Creation
 ---------------
@@ -55,7 +54,7 @@ and labeled columns:
 
    dates = pd.date_range('20130101', periods=6)
    dates
-   df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
+   df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
    df
 
 Creating a ``DataFrame`` by passing a dict of objects that can be converted to series-like.
@@ -64,7 +63,7 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s
 
    df2 = pd.DataFrame({'A': 1.,
                        'B': pd.Timestamp('20130102'),
-                       'C': pd.Series(1, index=list(range(4)),dtype='float32'),
+                       'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                        'D': np.array([3] * 4, dtype='int32'),
                        'E': pd.Categorical(["test", "train", "test", "train"]),
                        'F': 'foo'})
@@ -190,31 +189,31 @@ Selecting on a multi-axis by label:
 
 .. ipython:: python
 
-   df.loc[:,['A','B']]
+   df.loc[:, ['A', 'B']]
 
 Showing label slicing, both endpoints are *included*:
 
 .. ipython:: python
 
-   df.loc['20130102':'20130104',['A','B']]
+   df.loc['20130102':'20130104', ['A', 'B']]
 
 Reduction in the dimensions of the returned object:
 
 .. ipython:: python
 
-   df.loc['20130102',['A','B']]
+   df.loc['20130102', ['A', 'B']]
 
 For getting a scalar value:
 
 .. ipython:: python
 
-   df.loc[dates[0],'A']
+   df.loc[dates[0], 'A']
 
 For getting fast access to a scalar (equivalent to the prior method):
 
 .. ipython:: python
 
-   df.at[dates[0],'A']
+   df.at[dates[0], 'A']
 
 Selection by Position
 ~~~~~~~~~~~~~~~~~~~~~
@@ -231,37 +230,37 @@ By integer slices, acting similar to numpy/python:
 
 .. ipython:: python
 
-   df.iloc[3:5,0:2]
+   df.iloc[3:5, 0:2]
 
 By lists of integer position locations, similar to the numpy/python style:
 
 .. ipython:: python
 
-   df.iloc[[1,2,4],[0,2]]
+   df.iloc[[1, 2, 4], [0, 2]]
 
 For slicing rows explicitly:
 
 .. ipython:: python
 
-   df.iloc[1:3,:]
+   df.iloc[1:3, :]
 
 For slicing columns explicitly:
 
 .. ipython:: python
 
-   df.iloc[:,1:3]
+   df.iloc[:, 1:3]
 
 For getting a value explicitly:
 
 .. ipython:: python
 
-   df.iloc[1,1]
+   df.iloc[1, 1]
 
 For getting fast access to a scalar (equivalent to the prior method):
 
 .. ipython:: python
 
-   df.iat[1,1]
+   df.iat[1, 1]
 
 Boolean Indexing
 ~~~~~~~~~~~~~~~~
@@ -303,19 +302,19 @@ Setting values by label:
 
 .. ipython:: python
 
-   df.at[dates[0],'A'] = 0
+   df.at[dates[0], 'A'] = 0
 
 Setting values by position:
 
 .. ipython:: python
 
-   df.iat[0,1] = 0
+   df.iat[0, 1] = 0
 
 Setting by assigning with a NumPy array:
 
 .. ipython:: python
 
-   df.loc[:,'D'] = np.array([5] * len(df))
+   df.loc[:, 'D'] = np.array([5] * len(df))
 
 The result of the prior setting operations.
 
@@ -345,7 +344,7 @@ returns a copy of the data.
 .. ipython:: python
 
    df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
-   df1.loc[dates[0]:dates[1],'E'] = 1
+   df1.loc[dates[0]:dates[1], 'E'] = 1
    df1
 
 To drop any rows that have missing data.
@@ -653,7 +652,8 @@ pandas can include categorical data in a ``DataFrame``. For full docs, see the
 
 .. ipython:: python
 
-    df = pd.DataFrame({"id":[1, 2, 3, 4, 5, 6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
+    df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6],
+                       "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
 
 Convert the raw grades to a categorical data type.
 
@@ -674,7 +674,8 @@ Reorder the categories and simultaneously add the missing categories (methods un
 
 .. ipython:: python
 
-    df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"])
+    df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium",
+                                                  "good", "very good"])
     df["grade"]
 
 Sorting is per order in the categories, not lexical order.
@@ -703,7 +704,8 @@ See the :ref:`Plotting <visualization>` docs.
 
 .. ipython:: python
 
-   ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
+   ts = pd.Series(np.random.randn(1000),
+                  index=pd.date_range('1/1/2000', periods=1000))
    ts = ts.cumsum()
 
    @savefig series_plot_basic.png
@@ -718,8 +720,10 @@ of the columns with labels:
                      columns=['A', 'B', 'C', 'D'])
    df = df.cumsum()
 
+   plt.figure()
+   df.plot()
    @savefig frame_plot_basic.png
-   plt.figure(); df.plot(); plt.legend(loc='best')
+   plt.legend(loc='best')
 
 Getting Data In/Out
 -------------------

From 448c9bc32ef49a40bef5f4c0f7d0fc055cd0d4a7 Mon Sep 17 00:00:00 2001
From: Varad Gunjal <varadgunjal@users.noreply.github.com>
Date: Tue, 27 Nov 2018 15:46:58 +0530
Subject: [PATCH 10/78] DOC: Capitalize docstring summaries (#23886)

---
 pandas/core/arrays/timedeltas.py    |   8 +-
 pandas/core/frame.py                | 130 ++++++-----
 pandas/core/groupby/base.py         |  11 +-
 pandas/core/groupby/groupby.py      | 130 ++++++-----
 pandas/core/indexes/base.py         | 332 ++++++++++++++++++----------
 pandas/core/indexes/datetimelike.py |  45 ++--
 pandas/core/indexes/multi.py        |   6 +-
 pandas/core/indexes/range.py        |   2 +-
 pandas/core/panel.py                |  66 +++---
 pandas/core/resample.py             | 139 +++++++-----
 pandas/core/series.py               | 154 +++++++------
 pandas/core/window.py               | 103 +++++----
 pandas/io/formats/style.py          |  58 +++--
 pandas/io/json/json.py              |  60 +++--
 pandas/io/json/normalize.py         |  11 +-
 pandas/io/pytables.py               |   4 +-
 pandas/tseries/offsets.py           | 146 ++++++++----
 17 files changed, 873 insertions(+), 532 deletions(-)

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index d1e6d979b554c4..da26966cfa94c3 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -241,7 +241,7 @@ def _add_offset(self, other):
     def _add_delta(self, delta):
         """
         Add a timedelta-like, Tick, or TimedeltaIndex-like object
-        to self, yielding a new TimedeltaArray
+        to self, yielding a new TimedeltaArray.
 
         Parameters
         ----------
@@ -256,7 +256,9 @@ def _add_delta(self, delta):
         return type(self)(new_values, freq='infer')
 
     def _add_datetime_arraylike(self, other):
-        """Add DatetimeArray/Index or ndarray[datetime64] to TimedeltaArray"""
+        """
+        Add DatetimeArray/Index or ndarray[datetime64] to TimedeltaArray.
+        """
         if isinstance(other, np.ndarray):
             # At this point we have already checked that dtype is datetime64
             from pandas.core.arrays import DatetimeArrayMixin
@@ -404,7 +406,7 @@ def total_seconds(self):
     def to_pytimedelta(self):
         """
         Return Timedelta Array/Index as object ndarray of datetime.timedelta
-        objects
+        objects.
 
         Returns
         -------
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 592825a7ea63b7..9481689d4099ac 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -285,7 +285,8 @@
 
 
 class DataFrame(NDFrame):
-    """ Two-dimensional size-mutable, potentially heterogeneous tabular data
+    """
+    Two-dimensional size-mutable, potentially heterogeneous tabular data
     structure with labeled axes (rows and columns). Arithmetic operations
     align on both row and column labels. Can be thought of as a dict-like
     container for Series objects. The primary pandas data structure.
@@ -651,10 +652,11 @@ def _repr_fits_vertical_(self):
     def _repr_fits_horizontal_(self, ignore_width=False):
         """
         Check if full repr fits in horizontal boundaries imposed by the display
-        options width and max_columns. In case off non-interactive session, no
-        boundaries apply.
+        options width and max_columns.
+
+        In case off non-interactive session, no boundaries apply.
 
-        ignore_width is here so ipnb+HTML output can behave the way
+        `ignore_width` is here so ipnb+HTML output can behave the way
         users expect. display.max_columns remains in effect.
         GH3541, GH3573
         """
@@ -702,14 +704,16 @@ def _repr_fits_horizontal_(self, ignore_width=False):
         return repr_width < width
 
     def _info_repr(self):
-        """True if the repr should show the info view."""
+        """
+        True if the repr should show the info view.
+        """
         info_repr_option = (get_option("display.large_repr") == "info")
         return info_repr_option and not (self._repr_fits_horizontal_() and
                                          self._repr_fits_vertical_())
 
     def __unicode__(self):
         """
-        Return a string representation for a particular DataFrame
+        Return a string representation for a particular DataFrame.
 
         Invoked by unicode(df) in py2 only. Yields a Unicode String in both
         py2/py3.
@@ -734,6 +738,7 @@ def __unicode__(self):
     def _repr_html_(self):
         """
         Return a html representation for a particular DataFrame.
+
         Mainly for IPython notebook.
         """
         # qtconsole doesn't report its line width, and also
@@ -974,12 +979,14 @@ def itertuples(self, index=True, name="Pandas"):
     items = iteritems
 
     def __len__(self):
-        """Returns length of info axis, but here we use the index """
+        """
+        Returns length of info axis, but here we use the index.
+        """
         return len(self.index)
 
     def dot(self, other):
         """
-        Matrix multiplication with DataFrame or Series objects.  Can also be
+        Matrix multiplication with DataFrame or Series objects. Can also be
         called using `self @ other` in Python >= 3.5.
 
         Parameters
@@ -1024,11 +1031,15 @@ def dot(self, other):
             raise TypeError('unsupported type: {oth}'.format(oth=type(other)))
 
     def __matmul__(self, other):
-        """ Matrix multiplication using binary `@` operator in Python>=3.5 """
+        """
+        Matrix multiplication using binary `@` operator in Python>=3.5.
+        """
         return self.dot(other)
 
     def __rmatmul__(self, other):
-        """ Matrix multiplication using binary `@` operator in Python>=3.5 """
+        """
+        Matrix multiplication using binary `@` operator in Python>=3.5.
+        """
         return self.T.dot(np.transpose(other)).T
 
     # ----------------------------------------------------------------------
@@ -1354,7 +1365,7 @@ def to_gbq(self, destination_table, project_id=None, chunksize=None,
     def from_records(cls, data, index=None, exclude=None, columns=None,
                      coerce_float=False, nrows=None):
         """
-        Convert structured or record ndarray to DataFrame
+        Convert structured or record ndarray to DataFrame.
 
         Parameters
         ----------
@@ -1579,7 +1590,8 @@ def to_records(self, index=True, convert_datetime64=None):
 
     @classmethod
     def from_items(cls, items, columns=None, orient='columns'):
-        """Construct a dataframe from a list of tuples
+        """
+        Construct a DataFrame from a list of tuples.
 
         .. deprecated:: 0.23.0
           `from_items` is deprecated and will be removed in a future version.
@@ -1673,7 +1685,8 @@ def _from_arrays(cls, arrays, columns, index, dtype=None):
     def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True,
                  encoding=None, tupleize_cols=None,
                  infer_datetime_format=False):
-        """Read CSV file.
+        """
+        Read CSV file.
 
         .. deprecated:: 0.21.0
             Use :func:`pandas.read_csv` instead.
@@ -1953,7 +1966,7 @@ def to_stata(self, fname, convert_dates=None, write_index=True,
 
     def to_feather(self, fname):
         """
-        write out the binary feather-format for DataFrames
+        Write out the binary feather-format for DataFrames.
 
         .. versionadded:: 0.20.0
 
@@ -2610,7 +2623,8 @@ def _unpickle_matrix_compat(self, state):  # pragma: no cover
     # Getting and setting elements
 
     def get_value(self, index, col, takeable=False):
-        """Quickly retrieve single value at passed column and index
+        """
+        Quickly retrieve single value at passed column and index.
 
         .. deprecated:: 0.21.0
             Use .at[] or .iat[] accessors instead.
@@ -2653,7 +2667,8 @@ def _get_value(self, index, col, takeable=False):
     _get_value.__doc__ = get_value.__doc__
 
     def set_value(self, index, col, value, takeable=False):
-        """Put single value at passed column and index
+        """
+        Put single value at passed column and index.
 
         .. deprecated:: 0.21.0
             Use .at[] or .iat[] accessors instead.
@@ -2698,18 +2713,17 @@ def _set_value(self, index, col, value, takeable=False):
 
     def _ixs(self, i, axis=0):
         """
+        Parameters
+        ----------
         i : int, slice, or sequence of integers
         axis : int
-        """
 
+        Notes
+        -----
+        If slice passed, the resulting data will be a view.
+        """
         # irow
         if axis == 0:
-            """
-            Notes
-            -----
-            If slice passed, the resulting data will be a view
-            """
-
             if isinstance(i, slice):
                 return self[i]
             else:
@@ -2735,12 +2749,6 @@ def _ixs(self, i, axis=0):
 
         # icol
         else:
-            """
-            Notes
-            -----
-            If slice passed, the resulting data will be a view
-            """
-
             label = self.columns[i]
             if isinstance(i, slice):
                 # need to return view
@@ -2887,7 +2895,8 @@ def _getitem_frame(self, key):
         return self.where(key)
 
     def query(self, expr, inplace=False, **kwargs):
-        """Query the columns of a frame with a boolean expression.
+        """
+        Query the columns of a DataFrame with a boolean expression.
 
         Parameters
         ----------
@@ -3223,7 +3232,9 @@ def _box_item_values(self, key, values):
             return self._box_col_values(values, items)
 
     def _box_col_values(self, values, items):
-        """ provide boxed values for a column """
+        """
+        Provide boxed values for a column.
+        """
         klass = self._constructor_sliced
         return klass(values, index=self.index, name=items, fastpath=True)
 
@@ -3289,8 +3300,8 @@ def _setitem_frame(self, key, value):
 
     def _ensure_valid_index(self, value):
         """
-        ensure that if we don't have an index, that we can create one from the
-        passed value
+        Ensure that if we don't have an index, that we can create one from the
+        passed value.
         """
         # GH5632, make sure that we are a Series convertible
         if not len(self.index) and is_list_like(value):
@@ -3552,7 +3563,9 @@ def _series(self):
         return result
 
     def lookup(self, row_labels, col_labels):
-        """Label-based "fancy indexing" function for DataFrame.
+        """
+        Label-based "fancy indexing" function for DataFrame.
+
         Given equal-length arrays of row and column labels, return an
         array of the values corresponding to each (row, col) pair.
 
@@ -3639,7 +3652,9 @@ def _reindex_columns(self, new_columns, method, copy, level,
                                            allow_dups=False)
 
     def _reindex_multi(self, axes, copy, fill_value):
-        """ we are guaranteed non-Nones in the axes! """
+        """
+        We are guaranteed non-Nones in the axes.
+        """
 
         new_index, row_indexer = self.index.reindex(axes['index'])
         new_columns, col_indexer = self.columns.reindex(axes['columns'])
@@ -3819,7 +3834,8 @@ def drop(self, labels=None, axis=0, index=None, columns=None,
                                              ('inplace', False),
                                              ('level', None)])
     def rename(self, *args, **kwargs):
-        """Alter axes labels.
+        """
+        Alter axes labels.
 
         Function / dict values must be unique (1-to-1). Labels not contained in
         a dict / Series will be left as-is. Extra labels listed don't throw an
@@ -4473,7 +4489,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
     def drop_duplicates(self, subset=None, keep='first', inplace=False):
         """
         Return DataFrame with duplicate rows removed, optionally only
-        considering certain columns
+        considering certain columns.
 
         Parameters
         ----------
@@ -4507,7 +4523,7 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False):
     def duplicated(self, subset=None, keep='first'):
         """
         Return boolean Series denoting duplicate rows, optionally only
-        considering certain columns
+        considering certain columns.
 
         Parameters
         ----------
@@ -4884,7 +4900,7 @@ def nsmallest(self, n, columns, keep='first'):
 
     def swaplevel(self, i=-2, j=-1, axis=0):
         """
-        Swap levels i and j in a MultiIndex on a particular axis
+        Swap levels i and j in a MultiIndex on a particular axis.
 
         Parameters
         ----------
@@ -4911,8 +4927,8 @@ def swaplevel(self, i=-2, j=-1, axis=0):
 
     def reorder_levels(self, order, axis=0):
         """
-        Rearrange index levels using input order.
-        May not drop or duplicate levels
+        Rearrange index levels using input order. May not drop or
+        duplicate levels.
 
         Parameters
         ----------
@@ -5501,7 +5517,7 @@ def pivot(self, index=None, columns=None, values=None):
     _shared_docs['pivot_table'] = """
         Create a spreadsheet-style pivot table as a DataFrame. The levels in
         the pivot table will be stored in MultiIndex objects (hierarchical
-        indexes) on the index and columns of the result DataFrame
+        indexes) on the index and columns of the result DataFrame.
 
         Parameters
         ----------%s
@@ -5803,9 +5819,11 @@ def unstack(self, level=-1, fill_value=None):
         """
         Pivot a level of the (necessarily hierarchical) index labels, returning
         a DataFrame having a new level of column labels whose inner-most level
-        consists of the pivoted index labels. If the index is not a MultiIndex,
-        the output will be a Series (the analogue of stack when the columns are
-        not a MultiIndex).
+        consists of the pivoted index labels.
+
+        If the index is not a MultiIndex, the output will be a Series
+        (the analogue of stack when the columns are not a MultiIndex).
+
         The level involved will automatically get sorted.
 
         Parameters
@@ -5861,7 +5879,7 @@ def unstack(self, level=-1, fill_value=None):
         return unstack(self, level, fill_value)
 
     _shared_docs['melt'] = ("""
-    "Unpivots" a DataFrame from wide format to long format, optionally
+    Unpivots a DataFrame from wide format to long format, optionally
     leaving identifier variables set.
 
     This function is useful to massage a DataFrame into a format where one
@@ -6067,8 +6085,7 @@ def _gotitem(self,
                  ):
         # type: (...) -> Union[Series, DataFrame]
         """
-        sub-classes to define
-        return a sliced object
+        Sub-classes to define. Return a sliced object.
 
         Parameters
         ----------
@@ -6819,7 +6836,7 @@ def _series_round(s, decimals):
 
     def corr(self, method='pearson', min_periods=1):
         """
-        Compute pairwise correlation of columns, excluding NA/null values
+        Compute pairwise correlation of columns, excluding NA/null values.
 
         Parameters
         ----------
@@ -7414,7 +7431,9 @@ def idxmax(self, axis=0, skipna=True):
         return Series(result, index=self._get_agg_axis(axis))
 
     def _get_agg_axis(self, axis_num):
-        """ let's be explicit about this """
+        """
+        Let's be explicit about this.
+        """
         if axis_num == 0:
             return self.columns
         elif axis_num == 1:
@@ -7604,7 +7623,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True,
 
     def to_timestamp(self, freq=None, how='start', axis=0, copy=True):
         """
-        Cast to DatetimeIndex of timestamps, at *beginning* of period
+        Cast to DatetimeIndex of timestamps, at *beginning* of period.
 
         Parameters
         ----------
@@ -7640,7 +7659,7 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True):
     def to_period(self, freq=None, axis=0, copy=True):
         """
         Convert DataFrame from DatetimeIndex to PeriodIndex with desired
-        frequency (inferred from index if not passed)
+        frequency (inferred from index if not passed).
 
         Parameters
         ----------
@@ -7780,6 +7799,7 @@ def isin(self, values):
 def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
     """
     Segregate Series based on type and coerce into matrices.
+
     Needs to handle a lot of exceptional cases.
     """
     # figure out the index, if necessary
@@ -7888,7 +7908,7 @@ def convert(v):
 
 def _to_arrays(data, columns, coerce_float=False, dtype=None):
     """
-    Return list of arrays, columns
+    Return list of arrays, columns.
     """
     if isinstance(data, DataFrame):
         if columns is not None:
@@ -7934,7 +7954,9 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None):
 
 
 def _masked_rec_array_to_mgr(data, index, columns, dtype, copy):
-    """ extract from a masked rec array and create the manager """
+    """
+    Extract from a masked rec array and create the manager.
+    """
 
     # essentially process a record array then fill it
     fill_value = data.fill_value
diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 1bf97690a84ed7..a148f7e0cab87b 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -12,11 +12,15 @@
 
 
 class GroupByMixin(object):
-    """ provide the groupby facilities to the mixed object """
+    """
+    Provide the groupby facilities to the mixed object.
+    """
 
     @staticmethod
     def _dispatch(name, *args, **kwargs):
-        """ dispatch to apply """
+        """
+        Dispatch to apply.
+        """
 
         def outer(self, *args, **kwargs):
             def f(x):
@@ -28,8 +32,7 @@ def f(x):
 
     def _gotitem(self, key, ndim, subset=None):
         """
-        sub-classes to define
-        return a sliced object
+        Sub-classes to define. Return a sliced object.
 
         Parameters
         ----------
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b68fdf853ab192..292d4207cf2c5c 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -291,7 +291,7 @@ class providing the base-class of operations.
 
 class GroupByPlot(PandasObject):
     """
-    Class implementing the .plot attribute for groupby objects
+    Class implementing the .plot attribute for groupby objects.
     """
 
     def __init__(self, groupby):
@@ -314,7 +314,7 @@ def f(self):
 @contextmanager
 def _group_selection_context(groupby):
     """
-    set / reset the _group_selection_context
+    Set / reset the _group_selection_context.
     """
     groupby._set_group_selection()
     yield groupby
@@ -377,14 +377,16 @@ def __unicode__(self):
 
     def _assure_grouper(self):
         """
-        we create the grouper on instantiation
-        sub-classes may have a different policy
+        We create the grouper on instantiation sub-classes may have a
+        different policy.
         """
         pass
 
     @property
     def groups(self):
-        """ dict {group name -> group labels} """
+        """
+        Dict {group name -> group labels}.
+        """
         self._assure_grouper()
         return self.grouper.groups
 
@@ -395,14 +397,16 @@ def ngroups(self):
 
     @property
     def indices(self):
-        """ dict {group name -> group indices} """
+        """
+        Dict {group name -> group indices}.
+        """
         self._assure_grouper()
         return self.grouper.indices
 
     def _get_indices(self, names):
         """
-        safe get multiple indices, translate keys for
-        datelike to underlying repr
+        Safe get multiple indices, translate keys for
+        datelike to underlying repr.
         """
 
         def get_converter(s):
@@ -450,7 +454,9 @@ def get_converter(s):
         return [self.indices.get(name, []) for name in names]
 
     def _get_index(self, name):
-        """ safe get index, translate keys for datelike to underlying repr """
+        """
+        Safe get index, translate keys for datelike to underlying repr.
+        """
         return self._get_indices([name])[0]
 
     @cache_readonly
@@ -465,8 +471,10 @@ def _selected_obj(self):
 
     def _reset_group_selection(self):
         """
-        Clear group based selection. Used for methods needing to return info on
-        each group regardless of whether a group selection was previously set.
+        Clear group based selection.
+
+        Used for methods needing to return info on each group regardless of
+        whether a group selection was previously set.
         """
         if self._group_selection is not None:
             # GH12839 clear cached selection too when changing group selection
@@ -475,8 +483,9 @@ def _reset_group_selection(self):
 
     def _set_group_selection(self):
         """
-        Create group based selection. Used when selection is not passed
-        directly but instead via a grouper.
+        Create group based selection.
+
+        Used when selection is not passed directly but instead via a grouper.
 
         NOTE: this should be paired with a call to _reset_group_selection
         """
@@ -617,7 +626,7 @@ def curried(x):
 
     def get_group(self, name, obj=None):
         """
-        Constructs NDFrame from group with provided name
+        Constructs NDFrame from group with provided name.
 
         Parameters
         ----------
@@ -643,7 +652,7 @@ def get_group(self, name, obj=None):
 
     def __iter__(self):
         """
-        Groupby iterator
+        Groupby iterator.
 
         Returns
         -------
@@ -743,11 +752,11 @@ def _cumcount_array(self, ascending=True):
 
     def _try_cast(self, result, obj, numeric_only=False):
         """
-        try to cast the result to our obj original type,
-        we may have roundtripped thru object in the mean-time
+        Try to cast the result to our obj original type,
+        we may have roundtripped through object in the mean-time.
 
-        if numeric_only is True, then only try to cast numerics
-        and not datetimelikes
+        If numeric_only is True, then only try to cast numerics
+        and not datetimelikes.
 
         """
         if obj.ndim > 1:
@@ -945,8 +954,9 @@ def _apply_filter(self, indices, dropna):
 class GroupBy(_GroupBy):
 
     """
-    Class for grouping and aggregating relational data. See aggregate,
-    transform, and apply functions on this object.
+    Class for grouping and aggregating relational data.
+
+    See aggregate, transform, and apply functions on this object.
 
     It's easiest to use obj.groupby(...) to use GroupBy, but you can also do:
 
@@ -1010,7 +1020,9 @@ class GroupBy(_GroupBy):
         Number of groups
     """
     def _bool_agg(self, val_test, skipna):
-        """Shared func to call any / all Cython GroupBy implementations"""
+        """
+        Shared func to call any / all Cython GroupBy implementations.
+        """
 
         def objs_to_bool(vals):
             try:
@@ -1036,7 +1048,7 @@ def result_to_bool(result):
     @Appender(_doc_template)
     def any(self, skipna=True):
         """
-        Returns True if any value in the group is truthful, else False
+        Returns True if any value in the group is truthful, else False.
 
         Parameters
         ----------
@@ -1049,7 +1061,7 @@ def any(self, skipna=True):
     @Appender(_doc_template)
     def all(self, skipna=True):
         """
-        Returns True if all values in the group are truthful, else False
+        Returns True if all values in the group are truthful, else False.
 
         Parameters
         ----------
@@ -1061,7 +1073,9 @@ def all(self, skipna=True):
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def count(self):
-        """Compute count of group, excluding missing values"""
+        """
+        Compute count of group, excluding missing values.
+        """
 
         # defined here for API doc
         raise NotImplementedError
@@ -1127,7 +1141,7 @@ def mean(self, *args, **kwargs):
     @Appender(_doc_template)
     def median(self, **kwargs):
         """
-        Compute median of groups, excluding missing values
+        Compute median of groups, excluding missing values.
 
         For multiple groupings, the result index will be a MultiIndex
         """
@@ -1148,9 +1162,9 @@ def f(x):
     @Appender(_doc_template)
     def std(self, ddof=1, *args, **kwargs):
         """
-        Compute standard deviation of groups, excluding missing values
+        Compute standard deviation of groups, excluding missing values.
 
-        For multiple groupings, the result index will be a MultiIndex
+        For multiple groupings, the result index will be a MultiIndex.
 
         Parameters
         ----------
@@ -1166,9 +1180,9 @@ def std(self, ddof=1, *args, **kwargs):
     @Appender(_doc_template)
     def var(self, ddof=1, *args, **kwargs):
         """
-        Compute variance of groups, excluding missing values
+        Compute variance of groups, excluding missing values.
 
-        For multiple groupings, the result index will be a MultiIndex
+        For multiple groupings, the result index will be a MultiIndex.
 
         Parameters
         ----------
@@ -1192,9 +1206,9 @@ def var(self, ddof=1, *args, **kwargs):
     @Appender(_doc_template)
     def sem(self, ddof=1):
         """
-        Compute standard error of the mean of groups, excluding missing values
+        Compute standard error of the mean of groups, excluding missing values.
 
-        For multiple groupings, the result index will be a MultiIndex
+        For multiple groupings, the result index will be a MultiIndex.
 
         Parameters
         ----------
@@ -1207,7 +1221,9 @@ def sem(self, ddof=1):
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def size(self):
-        """Compute group sizes"""
+        """
+        Compute group sizes.
+        """
         result = self.grouper.size()
 
         if isinstance(self.obj, Series):
@@ -1216,7 +1232,9 @@ def size(self):
 
     @classmethod
     def _add_numeric_operations(cls):
-        """ add numeric operations to the GroupBy generically """
+        """
+        Add numeric operations to the GroupBy generically.
+        """
 
         def groupby_function(name, alias, npfunc,
                              numeric_only=True, _convert=False,
@@ -1293,7 +1311,8 @@ def last(x):
     @Appender(_doc_template)
     def ohlc(self):
         """
-        Compute sum of values, excluding missing values
+        Compute sum of values, excluding missing values.
+
         For multiple groupings, the result index will be a MultiIndex
         """
 
@@ -1421,9 +1440,7 @@ def resample(self, rule, *args, **kwargs):
     @Appender(_doc_template)
     def rolling(self, *args, **kwargs):
         """
-        Return a rolling grouper, providing rolling
-        functionality per group
-
+        Return a rolling grouper, providing rolling functionality per group.
         """
         from pandas.core.window import RollingGroupby
         return RollingGroupby(self, *args, **kwargs)
@@ -1433,14 +1450,14 @@ def rolling(self, *args, **kwargs):
     def expanding(self, *args, **kwargs):
         """
         Return an expanding grouper, providing expanding
-        functionality per group
-
+        functionality per group.
         """
         from pandas.core.window import ExpandingGroupby
         return ExpandingGroupby(self, *args, **kwargs)
 
     def _fill(self, direction, limit=None):
-        """Shared function for `pad` and `backfill` to call Cython method
+        """
+        Shared function for `pad` and `backfill` to call Cython method.
 
         Parameters
         ----------
@@ -1474,7 +1491,7 @@ def _fill(self, direction, limit=None):
     @Substitution(name='groupby')
     def pad(self, limit=None):
         """
-        Forward fill the values
+        Forward fill the values.
 
         Parameters
         ----------
@@ -1494,7 +1511,7 @@ def pad(self, limit=None):
     @Substitution(name='groupby')
     def backfill(self, limit=None):
         """
-        Backward fill the values
+        Backward fill the values.
 
         Parameters
         ----------
@@ -1830,7 +1847,9 @@ def rank(self, method='average', ascending=True, na_option='keep',
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def cumprod(self, axis=0, *args, **kwargs):
-        """Cumulative product for each group"""
+        """
+        Cumulative product for each group.
+        """
         nv.validate_groupby_func('cumprod', args, kwargs,
                                  ['numeric_only', 'skipna'])
         if axis != 0:
@@ -1841,7 +1860,9 @@ def cumprod(self, axis=0, *args, **kwargs):
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def cumsum(self, axis=0, *args, **kwargs):
-        """Cumulative sum for each group"""
+        """
+        Cumulative sum for each group.
+        """
         nv.validate_groupby_func('cumsum', args, kwargs,
                                  ['numeric_only', 'skipna'])
         if axis != 0:
@@ -1852,7 +1873,9 @@ def cumsum(self, axis=0, *args, **kwargs):
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def cummin(self, axis=0, **kwargs):
-        """Cumulative min for each group"""
+        """
+        Cumulative min for each group.
+        """
         if axis != 0:
             return self.apply(lambda x: np.minimum.accumulate(x, axis))
 
@@ -1861,7 +1884,9 @@ def cummin(self, axis=0, **kwargs):
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def cummax(self, axis=0, **kwargs):
-        """Cumulative max for each group"""
+        """
+        Cumulative max for each group.
+        """
         if axis != 0:
             return self.apply(lambda x: np.maximum.accumulate(x, axis))
 
@@ -1873,7 +1898,8 @@ def _get_cythonized_result(self, how, grouper, aggregate=False,
                                result_is_index=False,
                                pre_processing=None, post_processing=None,
                                **kwargs):
-        """Get result for Cythonized functions
+        """
+        Get result for Cythonized functions.
 
         Parameters
         ----------
@@ -1968,7 +1994,7 @@ def _get_cythonized_result(self, how, grouper, aggregate=False,
     @Appender(_doc_template)
     def shift(self, periods=1, freq=None, axis=0):
         """
-        Shift each group by periods observations
+        Shift each group by periods observations.
 
         Parameters
         ----------
@@ -1991,7 +2017,9 @@ def shift(self, periods=1, freq=None, axis=0):
     @Appender(_doc_template)
     def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
                    axis=0):
-        """Calculate pct_change of each value to previous entry in group"""
+        """
+        Calculate pct_change of each value to previous entry in group.
+        """
         if freq is not None or axis != 0:
             return self.apply(lambda x: x.pct_change(periods=periods,
                                                      fill_method=fill_method,
@@ -2035,7 +2063,7 @@ def head(self, n=5):
     @Appender(_doc_template)
     def tail(self, n=5):
         """
-        Returns last n rows of each group
+        Returns last n rows of each group.
 
         Essentially equivalent to ``.apply(lambda x: x.tail(n))``,
         except ignores as_index flag.
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 28aefb652adb0c..22c348acaf3413 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -150,8 +150,9 @@ class InvalidIndexError(Exception):
 
 
 def _new_Index(cls, d):
-    """ This is called upon unpickling, rather than the default which doesn't
-    have arguments and breaks __new__
+    """
+    This is called upon unpickling, rather than the default which doesn't
+    have arguments and breaks __new__.
     """
     # required for backward compat, because PI can't be instantiated with
     # ordinals through __new__ GH #13277
@@ -164,7 +165,7 @@ def _new_Index(cls, d):
 class Index(IndexOpsMixin, PandasObject):
     """
     Immutable ndarray implementing an ordered, sliceable set. The basic object
-    storing axis labels for all pandas objects
+    storing axis labels for all pandas objects.
 
     Parameters
     ----------
@@ -490,8 +491,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
     @classmethod
     def _simple_new(cls, values, name=None, dtype=None, **kwargs):
         """
-        we require the we have a dtype compat for the values
-        if we are passed a non-dtype compat, then coerce using the constructor
+        We require that we have a dtype compat for the values. If we are passed
+        a non-dtype compat, then coerce using the constructor.
 
         Must be careful not to recurse.
         """
@@ -518,9 +519,9 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs):
         return result._reset_identity()
 
     _index_shared_docs['_shallow_copy'] = """
-        create a new Index with the same class as the caller, don't copy the
+        Create a new Index with the same class as the caller, don't copy the
         data, use the same object attributes with passed in attributes taking
-        precedence
+        precedence.
 
         *this is an internal non-public method*
 
@@ -550,9 +551,9 @@ def _shallow_copy(self, values=None, **kwargs):
 
     def _shallow_copy_with_infer(self, values, **kwargs):
         """
-        create a new Index inferring the class with passed value, don't copy
+        Create a new Index inferring the class with passed value, don't copy
         the data, use the same object attributes with passed in attributes
-        taking precedence
+        taking precedence.
 
         *this is an internal non-public method*
 
@@ -575,11 +576,11 @@ def _shallow_copy_with_infer(self, values, **kwargs):
 
     def _deepcopy_if_needed(self, orig, copy=False):
         """
-        .. versionadded:: 0.19.0
-
         Make a copy of self if data coincides (in memory) with orig.
         Subclasses should override this if self._base is not an ndarray.
 
+        .. versionadded:: 0.19.0
+
         Parameters
         ----------
         orig : ndarray
@@ -608,7 +609,9 @@ def _update_inplace(self, result, **kwargs):
         raise TypeError("Index can't be updated inplace")
 
     def _sort_levels_monotonic(self):
-        """ compat with MultiIndex """
+        """
+        Compat with MultiIndex.
+        """
         return self
 
     _index_shared_docs['_get_grouper_for_level'] = """
@@ -643,7 +646,7 @@ def _get_grouper_for_level(self, mapper, level=None):
 
     def is_(self, other):
         """
-        More flexible, faster check like ``is`` but that works through views
+        More flexible, faster check like ``is`` but that works through views.
 
         Note: this is *not* the same as ``Index.identical()``, which checks
         that metadata is also the same.
@@ -662,24 +665,28 @@ def is_(self, other):
             other, '_id', Ellipsis) and self._id is not None
 
     def _reset_identity(self):
-        """Initializes or resets ``_id`` attribute with new object"""
+        """
+        Initializes or resets ``_id`` attribute with new object.
+        """
         self._id = _Identity()
         return self
 
     # ndarray compat
     def __len__(self):
         """
-        return the length of the Index
+        Return the length of the Index.
         """
         return len(self._data)
 
     def __array__(self, dtype=None):
-        """ the array interface, return my values """
+        """
+        The array interface, return my values.
+        """
         return self._data.view(np.ndarray)
 
     def __array_wrap__(self, result, context=None):
         """
-        Gets called after a ufunc
+        Gets called after a ufunc.
         """
         if is_bool_dtype(result):
             return result
@@ -690,24 +697,31 @@ def __array_wrap__(self, result, context=None):
 
     @cache_readonly
     def dtype(self):
-        """ return the dtype object of the underlying data """
+        """
+        Return the dtype object of the underlying data.
+        """
         return self._data.dtype
 
     @cache_readonly
     def dtype_str(self):
-        """ return the dtype str of the underlying data """
+        """
+        Return the dtype str of the underlying data.
+        """
         return str(self.dtype)
 
     @property
     def values(self):
-        """ return the underlying data as an ndarray """
+        """
+        Return the underlying data as an ndarray.
+        """
         return self._data.view(np.ndarray)
 
     @property
     def _values(self):
         # type: () -> Union[ExtensionArray, Index, np.ndarray]
         # TODO(EA): remove index types as they become extension arrays
-        """The best array representation.
+        """
+        The best array representation.
 
         This is an ndarray, ExtensionArray, or Index subclass. This differs
         from ``_ndarray_values``, which always returns an ndarray.
@@ -825,12 +839,12 @@ def repeat(self, repeats, *args, **kwargs):
         return self._shallow_copy(self._values.repeat(repeats))
 
     _index_shared_docs['where'] = """
-        .. versionadded:: 0.19.0
-
         Return an Index of same shape as self and whose corresponding
         entries are from self where cond is True and otherwise are from
         other.
 
+        .. versionadded:: 0.19.0
+
         Parameters
         ----------
         cond : boolean array-like with the same length as self
@@ -862,7 +876,7 @@ def where(self, cond, other=None):
 
     def ravel(self, order='C'):
         """
-        return an ndarray of the flattened values of the underlying data
+        Return an ndarray of the flattened values of the underlying data.
 
         See Also
         --------
@@ -927,8 +941,16 @@ def _string_data_error(cls, data):
 
     @classmethod
     def _coerce_to_ndarray(cls, data):
-        """coerces data to ndarray, raises on scalar data. Converts other
-        iterables to list first and then to array. Does not touch ndarrays.
+        """
+        Coerces data to ndarray.
+
+        Converts other iterables to list first and then to array.
+        Does not touch ndarrays.
+
+        Raises
+        ------
+        TypeError
+            When the data passed in is a scalar.
         """
 
         if not isinstance(data, (np.ndarray, Index)):
@@ -942,7 +964,9 @@ def _coerce_to_ndarray(cls, data):
         return data
 
     def _get_attributes_dict(self):
-        """ return an attributes dict for my class """
+        """
+        Return an attributes dict for my class.
+        """
         return {k: getattr(self, k, None) for k in self._attributes}
 
     def view(self, cls=None):
@@ -959,7 +983,7 @@ def view(self, cls=None):
 
     def _coerce_scalar_to_index(self, item):
         """
-        we need to coerce a scalar to a compat for our index type
+        We need to coerce a scalar to a compat for our index type.
 
         Parameters
         ----------
@@ -1078,13 +1102,13 @@ def _format_space(self):
     @property
     def _formatter_func(self):
         """
-        Return the formatter function
+        Return the formatter function.
         """
         return default_pprint
 
     def _format_data(self, name=None):
         """
-        Return the formatted data as a unicode string
+        Return the formatted data as a unicode string.
         """
 
         # do we want to justify (only do so for non-objects)
@@ -1097,7 +1121,7 @@ def _format_data(self, name=None):
 
     def _format_attrs(self):
         """
-        Return a list of tuples of the (attr,formatted_value)
+        Return a list of tuples of the (attr,formatted_value).
         """
         return format_object_attrs(self)
 
@@ -1124,7 +1148,7 @@ def to_flat_index(self):
     def to_series(self, index=None, name=None):
         """
         Create a Series with both index and values equal to the index keys
-        useful with map for returning an indexer based on an index
+        useful with map for returning an indexer based on an index.
 
         Parameters
         ----------
@@ -1251,7 +1275,9 @@ def astype(self, dtype, copy=True):
             raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
 
     def _to_safe_for_reshape(self):
-        """ convert to object if we are a categorical """
+        """
+        Convert to object if we are a categorical.
+        """
         return self
 
     def _assert_can_do_setop(self, other):
@@ -1268,11 +1294,15 @@ def _convert_can_do_setop(self, other):
         return other, result_name
 
     def _convert_for_op(self, value):
-        """ Convert value to be insertable to ndarray """
+        """
+        Convert value to be insertable to ndarray.
+        """
         return value
 
     def _assert_can_do_op(self, value):
-        """ Check value is valid for scalar op """
+        """
+        Check value is valid for scalar op.
+        """
         if not is_scalar(value):
             msg = "'value' must be a scalar, passed: {0}"
             raise TypeError(msg.format(type(value).__name__))
@@ -1445,7 +1475,7 @@ def _has_complex_internals(self):
 
     def _summary(self, name=None):
         """
-        Return a summarized representation
+        Return a summarized representation.
 
         Parameters
         ----------
@@ -1476,7 +1506,8 @@ def _summary(self, name=None):
 
     def summary(self, name=None):
         """
-        Return a summarized representation
+        Return a summarized representation.
+
         .. deprecated:: 0.23.0
         """
         warnings.warn("'summary' is deprecated and will be removed in a "
@@ -1493,13 +1524,15 @@ def _mpl_repr(self):
     # introspection
     @property
     def is_monotonic(self):
-        """ alias for is_monotonic_increasing (deprecated) """
+        """
+        Alias for is_monotonic_increasing.
+        """
         return self.is_monotonic_increasing
 
     @property
     def is_monotonic_increasing(self):
         """
-        return if the index is monotonic increasing (only equal or
+        Return if the index is monotonic increasing (only equal or
         increasing) values.
 
         Examples
@@ -1516,7 +1549,7 @@ def is_monotonic_increasing(self):
     @property
     def is_monotonic_decreasing(self):
         """
-        return if the index is monotonic decreasing (only equal or
+        Return if the index is monotonic decreasing (only equal or
         decreasing) values.
 
         Examples
@@ -1532,8 +1565,9 @@ def is_monotonic_decreasing(self):
 
     @property
     def _is_strictly_monotonic_increasing(self):
-        """return if the index is strictly monotonic increasing
-        (only increasing) values
+        """
+        Return if the index is strictly monotonic increasing
+        (only increasing) values.
 
         Examples
         --------
@@ -1548,8 +1582,9 @@ def _is_strictly_monotonic_increasing(self):
 
     @property
     def _is_strictly_monotonic_decreasing(self):
-        """return if the index is strictly monotonic decreasing
-        (only decreasing) values
+        """
+        Return if the index is strictly monotonic decreasing
+        (only decreasing) values.
 
         Examples
         --------
@@ -1567,7 +1602,9 @@ def is_lexsorted_for_tuple(self, tup):
 
     @cache_readonly
     def is_unique(self):
-        """ return if the index has unique values """
+        """
+        Return if the index has unique values.
+        """
         return self._engine.is_unique
 
     @property
@@ -1855,7 +1892,9 @@ def _convert_list_indexer(self, keyarr, kind=None):
         return None
 
     def _invalid_indexer(self, form, key):
-        """ consistent invalid indexer message """
+        """
+        Consistent invalid indexer message.
+        """
         raise TypeError("cannot do {form} indexing on {klass} with these "
                         "indexers [{key}] of {kind}".format(
                             form=form, klass=type(self), key=key,
@@ -1958,11 +1997,15 @@ def _get_level_number(self, level):
 
     @cache_readonly
     def inferred_type(self):
-        """ return a string of the type inferred from the values """
+        """
+        Return a string of the type inferred from the values.
+        """
         return lib.infer_dtype(self)
 
     def _is_memory_usage_qualified(self):
-        """ return a boolean if we need a qualified .info display """
+        """
+        Return a boolean if we need a qualified .info display.
+        """
         return self.is_object()
 
     def is_type_compatible(self, kind):
@@ -1980,7 +2023,9 @@ def __reduce__(self):
         return _new_Index, (self.__class__, d), None
 
     def __setstate__(self, state):
-        """Necessary for making this object picklable"""
+        """
+        Necessary for making this object picklable.
+        """
 
         if isinstance(state, dict):
             self._data = state.pop('data')
@@ -2014,7 +2059,7 @@ def __nonzero__(self):
     __bool__ = __nonzero__
 
     _index_shared_docs['__contains__'] = """
-        return a boolean if this key is IN the index
+        Return a boolean if this key is IN the index.
 
         Parameters
         ----------
@@ -2034,7 +2079,7 @@ def __contains__(self, key):
             return False
 
     _index_shared_docs['contains'] = """
-        return a boolean if this key is IN the index
+        Return a boolean if this key is IN the index.
 
         Parameters
         ----------
@@ -2109,7 +2154,7 @@ def _can_hold_identifiers_and_holds_name(self, name):
 
     def append(self, other):
         """
-        Append a collection of Index options together
+        Append a collection of Index options together.
 
         Parameters
         ----------
@@ -2146,13 +2191,13 @@ def _concat(self, to_concat, name):
 
     def _concat_same_dtype(self, to_concat, name):
         """
-        Concatenate to_concat which has the same class
+        Concatenate to_concat which has the same class.
         """
         # must be overridden in specific classes
         return _concat._concat_index_asobject(to_concat, name)
 
     _index_shared_docs['take'] = """
-        return a new %(klass)s of the values selected by the indices
+        Return a new %(klass)s of the values selected by the indices.
 
         For internal compatibility with numpy arrays.
 
@@ -2192,7 +2237,9 @@ def take(self, indices, axis=0, allow_fill=True,
 
     def _assert_take_fillable(self, values, indices, allow_fill=True,
                               fill_value=None, na_value=np.nan):
-        """ Internal method to handle NA filling of take """
+        """
+        Internal method to handle NA filling of take.
+        """
         indices = ensure_platform_int(indices)
 
         # only fill if we are passing a non-None fill_value
@@ -2211,7 +2258,9 @@ def _assert_take_fillable(self, values, indices, allow_fill=True,
 
     @cache_readonly
     def _isnan(self):
-        """ return if each value is nan"""
+        """
+        Return if each value is NaN.
+        """
         if self._can_hold_na:
             return isna(self)
         else:
@@ -2345,7 +2394,7 @@ def notna(self):
 
     def putmask(self, mask, value):
         """
-        return a new Index of the values set with the mask
+        Return a new Index of the values set with the mask.
 
         See Also
         --------
@@ -2364,7 +2413,7 @@ def putmask(self, mask, value):
 
     def format(self, name=False, formatter=None, **kwargs):
         """
-        Render a string representation of the Index
+        Render a string representation of the Index.
         """
         header = []
         if name:
@@ -2430,7 +2479,9 @@ def to_native_types(self, slicer=None, **kwargs):
         return values._format_native_types(**kwargs)
 
     def _format_native_types(self, na_rep='', quoting=None, **kwargs):
-        """ actually format my specific types """
+        """
+        Actually format specific types of the index.
+        """
         mask = isna(self)
         if not self.is_object() and not quoting:
             values = np.asarray(self).astype(str)
@@ -2461,8 +2512,9 @@ def equals(self, other):
             return False
 
     def identical(self, other):
-        """Similar to equals, but check that other comparable attributes are
-        also equal
+        """
+        Similar to equals, but check that other comparable attributes are
+        also equal.
         """
         return (self.equals(other) and
                 all((getattr(self, c, None) == getattr(other, c, None)
@@ -2536,12 +2588,36 @@ def asof(self, label):
 
     def asof_locs(self, where, mask):
         """
-        where : array of timestamps
-        mask : array of booleans where data is not NA
+        Finds the locations (indices) of the labels from the index for
+        every entry in the `where` argument.
+
+        As in the `asof` function, if the label (a particular entry in
+        `where`) is not in the index, the latest index label upto the
+        passed label is chosen and its index returned.
+
+        If all of the labels in the index are later than a label in `where`,
+        -1 is returned.
+
+        `mask` is used to ignore NA values in the index during calculation.
+
+        Parameters
+        ----------
+        where : Index
+            An Index consisting of an array of timestamps.
+        mask : array-like
+            Array of booleans denoting where values in the original
+            data are not NA.
+
+        Returns
+        -------
+        numpy.ndarray
+            An array of locations (indices) of the labels from the Index
+            which correspond to the return values of the `asof` function
+            for every element in `where`.
         """
         locs = self.values[mask].searchsorted(where.values, side='right')
-
         locs = np.where(locs > 0, locs - 1, 0)
+
         result = np.arange(len(self))[mask].take(locs)
 
         first = mask.argmax()
@@ -2609,8 +2685,7 @@ def sort(self, *args, **kwargs):
 
     def sortlevel(self, level=None, ascending=True, sort_remaining=None):
         """
-
-        For internal compatibility with with the Index API
+        For internal compatibility with with the Index API.
 
         Sort the Index. This is for compat with MultiIndex
 
@@ -2980,6 +3055,7 @@ def difference(self, other, sort=True):
     def symmetric_difference(self, other, result_name=None):
         """
         Compute the symmetric difference of two Index objects.
+
         It's sorted if sorting is possible.
 
         Parameters
@@ -3136,7 +3212,7 @@ def get_loc(self, key, method=None, tolerance=None):
     def get_value(self, series, key):
         """
         Fast lookup of value from 1-dimensional ndarray. Only use this if you
-        know what you're doing
+        know what you're doing.
         """
 
         # if we have something that is Index-like, then
@@ -3190,8 +3266,11 @@ def get_value(self, series, key):
 
     def set_value(self, arr, key, value):
         """
-        Fast lookup of value from 1-dimensional ndarray. Only use this if you
-        know what you're doing
+        Fast lookup of value from 1-dimensional ndarray.
+
+        Notes
+        -----
+        Only use this if you know what you're doing.
         """
         self._engine.set_value(com.values_from_object(arr),
                                com.values_from_object(key), value)
@@ -3240,8 +3319,10 @@ def _get_level_values(self, level):
 
     def droplevel(self, level=0):
         """
-        Return index with requested level(s) removed. If resulting index has
-        only 1 level left, the result will be of Index type, not MultiIndex.
+        Return index with requested level(s) removed.
+
+        If resulting index has only 1 level left, the result will be
+        of Index type, not MultiIndex.
 
         .. versionadded:: 0.23.1 (support for non-MultiIndex)
 
@@ -3408,7 +3489,7 @@ def _get_fill_indexer(self, target, method, limit=None, tolerance=None):
     def _get_fill_indexer_searchsorted(self, target, method, limit=None):
         """
         Fallback pad/backfill get_indexer that works for monotonic decreasing
-        indexes and non-monotonic targets
+        indexes and non-monotonic targets.
         """
         if limit is not None:
             raise ValueError('limit argument for %r method only well-defined '
@@ -3501,8 +3582,10 @@ def get_indexer_non_unique(self, target):
 
     def get_indexer_for(self, target, **kwargs):
         """
-        guaranteed return of an indexer even when non-unique
-        This dispatches to get_indexer or get_indexer_nonunique as appropriate
+        Guaranteed return of an indexer even when non-unique.
+
+        This dispatches to get_indexer or get_indexer_nonunique
+        as appropriate.
         """
         if self.is_unique:
             return self.get_indexer(target, **kwargs)
@@ -3681,9 +3764,7 @@ def isin(self, values, level=None):
 
     def _can_reindex(self, indexer):
         """
-        *this is an internal non-public method*
-
-        Check if we are allowing reindexing with this particular indexer
+        Check if we are allowing reindexing with this particular indexer.
 
         Parameters
         ----------
@@ -3701,7 +3782,8 @@ def _can_reindex(self, indexer):
     def reindex(self, target, method=None, level=None, limit=None,
                 tolerance=None):
         """
-        Create index with target's values (move/add/delete values as necessary)
+        Create index with target's values (move/add/delete values
+        as necessary).
 
         Parameters
         ----------
@@ -3758,10 +3840,8 @@ def reindex(self, target, method=None, level=None, limit=None,
 
     def _reindex_non_unique(self, target):
         """
-        *this is an internal non-public method*
-
         Create a new index with target's values (move/add/delete values as
-        necessary) use with non-unique Index and a possibly non-unique target
+        necessary) use with non-unique Index and a possibly non-unique target.
 
         Parameters
         ----------
@@ -3818,8 +3898,6 @@ def _reindex_non_unique(self, target):
         return new_index, indexer, new_indexer
 
     _index_shared_docs['join'] = """
-        *this is an internal non-public method*
-
         Compute join_index and indexers to conform data
         structures to the new index.
 
@@ -4035,16 +4113,18 @@ def _join_level(self, other, level, how='left', return_indexers=False,
         """
         The join method *only* affects the level of the resulting
         MultiIndex. Otherwise it just exactly aligns the Index data to the
-        labels of the level in the MultiIndex. If `keep_order` == True, the
-        order of the data indexed by the MultiIndex will not be changed;
-        otherwise, it will tie out with `other`.
+        labels of the level in the MultiIndex.
+
+        If ```keep_order == True```, the order of the data indexed by the
+        MultiIndex will not be changed; otherwise, it will tie out
+        with `other`.
         """
         from .multi import MultiIndex
 
         def _get_leaf_sorter(labels):
             """
-            returns sorter for the inner most level while preserving the
-            order of higher levels
+            Returns sorter for the inner most level while preserving the
+            order of higher levels.
             """
             if labels[0].size == 0:
                 return np.empty(0, dtype='int64')
@@ -4266,8 +4346,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
 
     def _maybe_cast_indexer(self, key):
         """
-        If we have a float key and are not a floating index
-        then try to cast to an int if equivalent
+        If we have a float key and are not a floating index, then try to cast
+        to an int if equivalent.
         """
 
         if is_float(key) and not self.is_floating():
@@ -4281,9 +4361,8 @@ def _maybe_cast_indexer(self, key):
 
     def _validate_indexer(self, form, key, kind):
         """
-        if we are positional indexer
-        validate that we have appropriate typed bounds
-        must be an integer
+        If we are positional indexer, validate that we have appropriate
+        typed bounds must be an integer.
         """
         assert kind in ['ix', 'loc', 'getitem', 'iloc']
 
@@ -4493,7 +4572,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None):
 
     def delete(self, loc):
         """
-        Make new Index with passed location(-s) deleted
+        Make new Index with passed location(-s) deleted.
 
         Returns
         -------
@@ -4503,8 +4582,9 @@ def delete(self, loc):
 
     def insert(self, loc, item):
         """
-        Make new Index inserting new item at location. Follows
-        Python list.append semantics for negative values
+        Make new Index inserting new item at location.
+
+        Follows Python list.append semantics for negative values.
 
         Parameters
         ----------
@@ -4522,7 +4602,7 @@ def insert(self, loc, item):
 
     def drop(self, labels, errors='raise'):
         """
-        Make new Index with passed list of labels deleted
+        Make new Index with passed list of labels deleted.
 
         Parameters
         ----------
@@ -4762,7 +4842,9 @@ def _evaluate_with_datetime_like(self, other, op):
 
     @classmethod
     def _add_comparison_methods(cls):
-        """ add in comparison methods """
+        """
+        Add in comparison methods.
+        """
         cls.__eq__ = _make_comparison_op(operator.eq, cls)
         cls.__ne__ = _make_comparison_op(operator.ne, cls)
         cls.__lt__ = _make_comparison_op(operator.lt, cls)
@@ -4772,7 +4854,9 @@ def _add_comparison_methods(cls):
 
     @classmethod
     def _add_numeric_methods_add_sub_disabled(cls):
-        """ add in the numeric add/sub methods to disable """
+        """
+        Add in the numeric add/sub methods to disable.
+        """
         cls.__add__ = make_invalid_op('__add__')
         cls.__radd__ = make_invalid_op('__radd__')
         cls.__iadd__ = make_invalid_op('__iadd__')
@@ -4782,7 +4866,9 @@ def _add_numeric_methods_add_sub_disabled(cls):
 
     @classmethod
     def _add_numeric_methods_disabled(cls):
-        """ add in numeric methods to disable other than add/sub """
+        """
+        Add in numeric methods to disable other than add/sub.
+        """
         cls.__pow__ = make_invalid_op('__pow__')
         cls.__rpow__ = make_invalid_op('__rpow__')
         cls.__mul__ = make_invalid_op('__mul__')
@@ -4802,12 +4888,15 @@ def _add_numeric_methods_disabled(cls):
         cls.__inv__ = make_invalid_op('__inv__')
 
     def _maybe_update_attributes(self, attrs):
-        """ Update Index attributes (e.g. freq) depending on op """
+        """
+        Update Index attributes (e.g. freq) depending on op.
+        """
         return attrs
 
     def _validate_for_numeric_unaryop(self, op, opstr):
-        """ validate if we can perform a numeric unary operation """
-
+        """
+        Validate if we can perform a numeric unary operation.
+        """
         if not self._is_numeric_dtype:
             raise TypeError("cannot evaluate a numeric op "
                             "{opstr} for type: {typ}"
@@ -4815,10 +4904,12 @@ def _validate_for_numeric_unaryop(self, op, opstr):
 
     def _validate_for_numeric_binop(self, other, op):
         """
-        return valid other, evaluate or raise TypeError
-        if we are not of the appropriate type
+        Return valid other; evaluate or raise TypeError if we are not of
+        the appropriate type.
 
-        internal method called by ops
+        Notes
+        -----
+        This is an internal method called by ops.
         """
         opstr = '__{opname}__'.format(opname=op.__name__)
         # if we are an inheritor of numeric,
@@ -4858,7 +4949,9 @@ def _validate_for_numeric_binop(self, other, op):
 
     @classmethod
     def _add_numeric_methods_binary(cls):
-        """ add in numeric methods """
+        """
+        Add in numeric methods.
+        """
         cls.__add__ = _make_arithmetic_op(operator.add, cls)
         cls.__radd__ = _make_arithmetic_op(ops.radd, cls)
         cls.__sub__ = _make_arithmetic_op(operator.sub, cls)
@@ -4880,8 +4973,9 @@ def _add_numeric_methods_binary(cls):
 
     @classmethod
     def _add_numeric_methods_unary(cls):
-        """ add in numeric unary methods """
-
+        """
+        Add in numeric unary methods.
+        """
         def _make_evaluate_unary(op, opstr):
 
             def _evaluate_numeric_unary(self):
@@ -4905,8 +4999,9 @@ def _add_numeric_methods(cls):
 
     @classmethod
     def _add_logical_methods(cls):
-        """ add in logical methods """
-
+        """
+        Add in logical methods.
+        """
         _doc = """
         %(desc)s
 
@@ -5010,7 +5105,9 @@ def logical_func(self, *args, **kwargs):
 
     @classmethod
     def _add_logical_methods_disabled(cls):
-        """ add in logical methods to disable """
+        """
+        Add in logical methods to disable.
+        """
         cls.all = make_invalid_op('all')
         cls.any = make_invalid_op('any')
 
@@ -5021,7 +5118,8 @@ def _add_logical_methods_disabled(cls):
 
 
 def ensure_index_from_sequences(sequences, names=None):
-    """Construct an index from sequences of data.
+    """
+    Construct an index from sequences of data.
 
     A single sequence returns an Index. Many sequences returns a
     MultiIndex.
@@ -5062,7 +5160,7 @@ def ensure_index_from_sequences(sequences, names=None):
 
 def ensure_index(index_like, copy=False):
     """
-    Ensure that we have an index from some index-like object
+    Ensure that we have an index from some index-like object.
 
     Parameters
     ----------
@@ -5124,7 +5222,9 @@ def ensure_index(index_like, copy=False):
 
 
 def _ensure_has_len(seq):
-    """If seq is an iterator, put its values into a list."""
+    """
+    If seq is an iterator, put its values into a list.
+    """
     try:
         len(seq)
     except TypeError:
@@ -5135,7 +5235,7 @@ def _ensure_has_len(seq):
 
 def _trim_front(strings):
     """
-    Trims zeros and decimal points
+    Trims zeros and decimal points.
     """
     trimmed = strings
     while len(strings) > 0 and all(x[0] == ' ' for x in trimmed):
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 1179f6f39d06c9..0e2f7ceb24e94c 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -34,7 +34,9 @@
 
 
 class DatelikeOps(object):
-    """ common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex """
+    """
+    Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.
+    """
 
     def strftime(self, date_format):
         return Index(self.format(date_format=date_format),
@@ -76,7 +78,9 @@ def strftime(self, date_format):
 
 
 class TimelikeOps(object):
-    """ common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex """
+    """
+    Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
+    """
 
     _round_doc = (
         """
@@ -257,7 +261,9 @@ def equals(self, other):
 
     @staticmethod
     def _join_i8_wrapper(joinf, dtype, with_indexers=True):
-        """ create the join wrapper methods """
+        """
+        Create the join wrapper methods.
+        """
 
         @staticmethod
         def wrapper(left, right):
@@ -287,7 +293,7 @@ def _evaluate_compare(self, other, op):
     def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise',
                           from_utc=False):
         """
-        ensure that we are re-localized
+        Ensure that we are re-localized.
 
         This is for compat as we can then call this on all datetimelike
         indexes generally (ignored for Period/Timedelta)
@@ -320,7 +326,7 @@ def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise',
 
     def _box_values_as_index(self):
         """
-        return object Index which contains boxed values
+        Return object Index which contains boxed values.
         """
         from pandas.core.index import Index
         return Index(self._box_values(self.asi8), name=self.name, dtype=object)
@@ -357,7 +363,7 @@ def map(self, f):
 
     def sort_values(self, return_indexer=False, ascending=True):
         """
-        Return sorted copy of Index
+        Return sorted copy of Index.
         """
         if return_indexer:
             _as = self.argsort()
@@ -411,7 +417,8 @@ def take(self, indices, axis=0, allow_fill=True,
 
     @property
     def asobject(self):
-        """Return object Index which contains boxed values.
+        """
+        Return object Index which contains boxed values.
 
         .. deprecated:: 0.23.0
             Use ``astype(object)`` instead.
@@ -431,7 +438,7 @@ def _convert_tolerance(self, tolerance, target):
 
     def tolist(self):
         """
-        return a list of the underlying data
+        Return a list of the underlying data.
         """
         return list(self.astype(object))
 
@@ -466,6 +473,7 @@ def min(self, axis=None, *args, **kwargs):
     def argmin(self, axis=None, *args, **kwargs):
         """
         Returns the indices of the minimum values along an axis.
+
         See `numpy.ndarray.argmin` for more information on the
         `axis` parameter.
 
@@ -516,6 +524,7 @@ def max(self, axis=None, *args, **kwargs):
     def argmax(self, axis=None, *args, **kwargs):
         """
         Returns the indices of the maximum values along an axis.
+
         See `numpy.ndarray.argmax` for more information on the
         `axis` parameter.
 
@@ -541,7 +550,7 @@ def _formatter_func(self):
 
     def _format_attrs(self):
         """
-        Return a list of tuples of the (attr,formatted_value)
+        Return a list of tuples of the (attr,formatted_value).
         """
         attrs = super(DatetimeIndexOpsMixin, self)._format_attrs()
         for attrib in self._attributes:
@@ -554,8 +563,8 @@ def _format_attrs(self):
 
     def _convert_scalar_indexer(self, key, kind=None):
         """
-        we don't allow integer or float indexing on datetime-like when using
-        loc
+        We don't allow integer or float indexing on datetime-like when using
+        loc.
 
         Parameters
         ----------
@@ -581,8 +590,8 @@ def _convert_scalar_indexer(self, key, kind=None):
     @classmethod
     def _add_datetimelike_methods(cls):
         """
-        add in the datetimelike methods (as we may have to override the
-        superclass)
+        Add in the datetimelike methods (as we may have to override the
+        superclass).
         """
 
         def __add__(self, other):
@@ -613,7 +622,7 @@ def __rsub__(self, other):
     def isin(self, values):
         """
         Compute boolean array of whether each index value is found in the
-        passed set of values
+        passed set of values.
 
         Parameters
         ----------
@@ -633,7 +642,7 @@ def isin(self, values):
 
     def repeat(self, repeats, *args, **kwargs):
         """
-        Analogous to ndarray.repeat
+        Analogous to ndarray.repeat.
         """
         nv.validate_repeat(args, kwargs)
         if is_period_dtype(self):
@@ -654,7 +663,7 @@ def where(self, cond, other=None):
 
     def _summary(self, name=None):
         """
-        Return a summarized representation
+        Return a summarized representation.
 
         Parameters
         ----------
@@ -685,7 +694,7 @@ def _summary(self, name=None):
 
     def _concat_same_dtype(self, to_concat, name):
         """
-        Concatenate to_concat which has the same class
+        Concatenate to_concat which has the same class.
         """
         attribs = self._get_attributes_dict()
         attribs['name'] = name
@@ -742,7 +751,7 @@ def _time_shift(self, periods, freq=None):
 
 def _ensure_datetimelike_to_i8(other, to_utc=False):
     """
-    helper for coercing an input scalar or array to i8
+    Helper for coercing an input scalar or array to i8.
 
     Parameters
     ----------
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index f9483b48b5261c..ea6dfa6a3a6aff 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -121,7 +121,7 @@ def _codes_to_ints(self, codes):
 
 class MultiIndex(Index):
     """
-    A multi-level, or hierarchical, index object for pandas objects
+    A multi-level, or hierarchical, index object for pandas objects.
 
     Parameters
     ----------
@@ -1195,14 +1195,14 @@ def to_frame(self, index=True, name=None):
 
     def to_hierarchical(self, n_repeat, n_shuffle=1):
         """
-        .. deprecated:: 0.24.0
-
         Return a MultiIndex reshaped to conform to the
         shapes given by n_repeat and n_shuffle.
 
         Useful to replicate and rearrange a MultiIndex for combination
         with another Index with n_repeat items.
 
+        .. deprecated:: 0.24.0
+
         Parameters
         ----------
         n_repeat : int
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 0d4e7aaebeca5f..d6286244fcb7ec 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -122,7 +122,7 @@ def ensure_int(value, field):
 
     @classmethod
     def from_range(cls, data, name=None, dtype=None, **kwargs):
-        """ create RangeIndex from a range (py3), or xrange (py2) object """
+        """ Create RangeIndex from a range (py3), or xrange (py2) object. """
         if not isinstance(data, range):
             raise TypeError(
                 '{0}(...) must be called with object coercible to a '
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 90016f599addc5..b976dc27a69f70 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -48,7 +48,7 @@
 
 def _ensure_like_indices(time, panels):
     """
-    Makes sure that time and panels are conformable
+    Makes sure that time and panels are conformable.
     """
     n_time = len(time)
     n_panel = len(panels)
@@ -63,7 +63,7 @@ def _ensure_like_indices(time, panels):
 
 def panel_index(time, panels, names=None):
     """
-    Returns a multi-index suitable for a panel-like DataFrame
+    Returns a multi-index suitable for a panel-like DataFrame.
 
     Parameters
     ----------
@@ -157,7 +157,7 @@ def __init__(self, data=None, items=None, major_axis=None, minor_axis=None,
     def _init_data(self, data, copy, dtype, **kwargs):
         """
         Generate ND initialization; axes are passed
-        as required objects to __init__
+        as required objects to __init__.
         """
         if data is None:
             data = {}
@@ -242,7 +242,7 @@ def _init_arrays(self, arrays, arr_names, axes):
     @classmethod
     def from_dict(cls, data, intersect=False, orient='items', dtype=None):
         """
-        Construct Panel from dict of DataFrame objects
+        Construct Panel from dict of DataFrame objects.
 
         Parameters
         ----------
@@ -348,7 +348,7 @@ def _compare_constructor(self, other, func):
 
     def __unicode__(self):
         """
-        Return a string representation for a particular Panel
+        Return a string representation for a particular Panel.
 
         Invoked by unicode(df) in py2 only.
         Yields a Unicode String in both py2/py3.
@@ -377,7 +377,7 @@ def _get_plane_axes_index(self, axis):
         """
         Get my plane axes indexes: these are already
         (as compared with higher level planes),
-        as we are returning a DataFrame axes indexes
+        as we are returning a DataFrame axes indexes.
         """
         axis_name = self._get_axis_name(axis)
 
@@ -397,7 +397,7 @@ def _get_plane_axes(self, axis):
         """
         Get my plane axes indexes: these are already
         (as compared with higher level planes),
-        as we are returning a DataFrame axes
+        as we are returning a DataFrame axes.
         """
         return [self._get_axis(axi)
                 for axi in self._get_plane_axes_index(axis)]
@@ -409,14 +409,14 @@ def to_sparse(self, *args, **kwargs):
         NOT IMPLEMENTED: do not call this method, as sparsifying is not
         supported for Panel objects and will raise an error.
 
-        Convert to SparsePanel
+        Convert to SparsePanel.
         """
         raise NotImplementedError("sparsifying is not supported "
                                   "for Panel objects")
 
     def to_excel(self, path, na_rep='', engine=None, **kwargs):
         """
-        Write each DataFrame in Panel to a separate excel sheet
+        Write each DataFrame in Panel to a separate excel sheet.
 
         Parameters
         ----------
@@ -473,7 +473,8 @@ def as_matrix(self):
     # Getting and setting elements
 
     def get_value(self, *args, **kwargs):
-        """Quickly retrieve single value at (item, major, minor) location
+        """
+        Quickly retrieve single value at (item, major, minor) location.
 
         .. deprecated:: 0.21.0
 
@@ -520,7 +521,8 @@ def _get_value(self, *args, **kwargs):
     _get_value.__doc__ = get_value.__doc__
 
     def set_value(self, *args, **kwargs):
-        """Quickly set single value at (item, major, minor) location
+        """
+        Quickly set single value at (item, major, minor) location.
 
         .. deprecated:: 0.21.0
 
@@ -619,7 +621,9 @@ def __setitem__(self, key, value):
         NDFrame._set_item(self, key, mat)
 
     def _unpickle_panel_compat(self, state):  # pragma: no cover
-        "Unpickle the panel"
+        """
+        Unpickle the panel.
+        """
         from pandas.io.pickle import _unpickle_array
 
         _unpickle = _unpickle_array
@@ -687,7 +691,9 @@ def round(self, decimals=0, *args, **kwargs):
         raise TypeError("decimals must be an integer")
 
     def _needs_reindex_multi(self, axes, method, level):
-        """ don't allow a multi reindex on Panel or above ndim """
+        """
+        Don't allow a multi reindex on Panel or above ndim.
+        """
         return False
 
     def align(self, other, **kwargs):
@@ -695,7 +701,7 @@ def align(self, other, **kwargs):
 
     def dropna(self, axis=0, how='any', inplace=False):
         """
-        Drop 2D from panel, holding passed axis constant
+        Drop 2D from panel, holding passed axis constant.
 
         Parameters
         ----------
@@ -787,7 +793,7 @@ def _combine_panel(self, other, func):
 
     def major_xs(self, key):
         """
-        Return slice of panel along major axis
+        Return slice of panel along major axis.
 
         Parameters
         ----------
@@ -811,7 +817,7 @@ def major_xs(self, key):
 
     def minor_xs(self, key):
         """
-        Return slice of panel along minor axis
+        Return slice of panel along minor axis.
 
         Parameters
         ----------
@@ -835,7 +841,7 @@ def minor_xs(self, key):
 
     def xs(self, key, axis=1):
         """
-        Return slice of panel along selected axis
+        Return slice of panel along selected axis.
 
         Parameters
         ----------
@@ -871,6 +877,8 @@ def xs(self, key, axis=1):
 
     def _ixs(self, i, axis=0):
         """
+        Parameters
+        ----------
         i : int, slice, or sequence of integers
         axis : int
         """
@@ -898,7 +906,7 @@ def _ixs(self, i, axis=0):
 
     def groupby(self, function, axis='major'):
         """
-        Group data on given axis, returning GroupBy object
+        Group data on given axis, returning GroupBy object.
 
         Parameters
         ----------
@@ -993,7 +1001,7 @@ def construct_index_parts(idx, major=True):
 
     def apply(self, func, axis='major', **kwargs):
         """
-        Applies function along axis (or axes) of the Panel
+        Applies function along axis (or axes) of the Panel.
 
         Parameters
         ----------
@@ -1115,8 +1123,9 @@ def _apply_1d(self, func, axis):
         return self._construct_return_type(results, planes)
 
     def _apply_2d(self, func, axis):
-        """ handle 2-d slices, equiv to iterating over the other axis """
-
+        """
+        Handle 2-d slices, equiv to iterating over the other axis.
+        """
         ndim = self.ndim
         axis = [self._get_axis_number(a) for a in axis]
 
@@ -1172,7 +1181,9 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
         return self._construct_return_type(result, axes)
 
     def _construct_return_type(self, result, axes=None):
-        """ return the type for the ndim of the result """
+        """
+        Return the type for the ndim of the result.
+        """
         ndim = getattr(result, 'ndim', None)
 
         # need to assume they are the same
@@ -1308,6 +1319,7 @@ def count(self, axis='major'):
     def shift(self, periods=1, freq=None, axis='major'):
         """
         Shift index by desired number of periods with an optional time freq.
+
         The shifted data will not include the dropped periods and the
         shifted axis will be smaller than the original. This is different
         from the behavior of DataFrame.shift()
@@ -1333,7 +1345,7 @@ def tshift(self, periods=1, freq=None, axis='major'):
 
     def join(self, other, how='left', lsuffix='', rsuffix=''):
         """
-        Join items with other Panel either on major and minor axes column
+        Join items with other Panel either on major and minor axes column.
 
         Parameters
         ----------
@@ -1440,13 +1452,17 @@ def _get_join_index(self, other, how):
     # miscellaneous data creation
     @staticmethod
     def _extract_axes(self, data, axes, **kwargs):
-        """ return a list of the axis indices """
+        """
+        Return a list of the axis indices.
+        """
         return [self._extract_axis(self, data, axis=i, **kwargs)
                 for i, a in enumerate(axes)]
 
     @staticmethod
     def _extract_axes_for_slice(self, axes):
-        """ return the slice dictionary for these axes """
+        """
+        Return the slice dictionary for these axes.
+        """
         return {self._AXIS_SLICEMAP[i]: a for i, a in
                 zip(self._AXIS_ORDERS[self._AXIS_LEN - len(axes):], axes)}
 
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 125b441e5558a5..1e3fa166a9b4c6 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -81,7 +81,9 @@ def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs):
             self.groupby._set_grouper(self._convert_obj(obj), sort=True)
 
     def __unicode__(self):
-        """ provide a nice str repr of our rolling object """
+        """
+        Provide a nice str repr of our rolling object.
+        """
         attrs = ["{k}={v}".format(k=k, v=getattr(self.groupby, k))
                  for k in self._attributes if
                  getattr(self.groupby, k, None) is not None]
@@ -100,7 +102,7 @@ def __getattr__(self, attr):
 
     def __iter__(self):
         """
-        Resampler iterator
+        Resampler iterator.
 
         Returns
         -------
@@ -124,14 +126,18 @@ def ax(self):
 
     @property
     def _typ(self):
-        """ masquerade for compat as a Series or a DataFrame """
+        """
+        Masquerade for compat as a Series or a DataFrame.
+        """
         if isinstance(self._selected_obj, pd.Series):
             return 'series'
         return 'dataframe'
 
     @property
     def _from_selection(self):
-        """ is the resampling from a DataFrame column or MultiIndex level """
+        """
+        Is the resampling from a DataFrame column or MultiIndex level.
+        """
         # upsampling and PeriodIndex resampling do not work
         # with selection, this state used to catch and raise an error
         return (self.groupby is not None and
@@ -140,7 +146,7 @@ def _from_selection(self):
 
     def _convert_obj(self, obj):
         """
-        provide any conversions for the object in order to correctly handle
+        Provide any conversions for the object in order to correctly handle.
 
         Parameters
         ----------
@@ -158,17 +164,17 @@ def _get_binner_for_time(self):
 
     def _set_binner(self):
         """
-        setup our binners
-        cache these as we are an immutable object
-        """
+        Setup our binners.
 
+        Cache these as we are an immutable object
+        """
         if self.binner is None:
             self.binner, self.grouper = self._get_binner()
 
     def _get_binner(self):
         """
-        create the BinGrouper, assume that self.set_grouper(obj)
-        has already been called
+        Create the BinGrouper, assume that self.set_grouper(obj)
+        has already been called.
         """
 
         binner, bins, binlabels = self._get_binner_for_time()
@@ -176,28 +182,31 @@ def _get_binner(self):
         return binner, bin_grouper
 
     def _assure_grouper(self):
-        """ make sure that we are creating our binner & grouper """
+        """
+        Make sure that we are creating our binner & grouper.
+        """
         self._set_binner()
 
     @Substitution(klass='Resampler',
                   versionadded='.. versionadded:: 0.23.0',
                   examples="""
->>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
-...                   index=pd.date_range('2012-08-02', periods=4))
->>> df
-            A
-2012-08-02  1
-2012-08-03  2
-2012-08-04  3
-2012-08-05  4
-
-To get the difference between each 2-day period's maximum and minimum value in
-one pass, you can do
-
->>> df.resample('2D').pipe(lambda x: x.max() - x.min())
-            A
-2012-08-02  1
-2012-08-04  1""")
+    >>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
+    ...                   index=pd.date_range('2012-08-02', periods=4))
+    >>> df
+                A
+    2012-08-02  1
+    2012-08-03  2
+    2012-08-04  3
+    2012-08-05  4
+
+    To get the difference between each 2-day period's maximum and minimum
+    value in one pass, you can do
+
+    >>> df.resample('2D').pipe(lambda x: x.max() - x.min())
+                A
+    2012-08-02  1
+    2012-08-04  1
+    """)
     @Appender(_pipe_template)
     def pipe(self, func, *args, **kwargs):
         return super(Resampler, self).pipe(func, *args, **kwargs)
@@ -270,7 +279,7 @@ def aggregate(self, func, *args, **kwargs):
     def transform(self, arg, *args, **kwargs):
         """
         Call function producing a like-indexed Series on each group and return
-        a Series with the transformed values
+        a Series with the transformed values.
 
         Parameters
         ----------
@@ -296,8 +305,7 @@ def _upsample(self, f, limit=None, fill_value=None):
 
     def _gotitem(self, key, ndim, subset=None):
         """
-        sub-classes to define
-        return a sliced object
+        Sub-classes to define. Return a sliced object.
 
         Parameters
         ----------
@@ -320,7 +328,9 @@ def _gotitem(self, key, ndim, subset=None):
             return grouped
 
     def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
-        """ re-evaluate the obj with a groupby aggregation """
+        """
+        Re-evaluate the obj with a groupby aggregation.
+        """
 
         if grouper is None:
             self._set_binner()
@@ -352,7 +362,7 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
 
     def _apply_loffset(self, result):
         """
-        if loffset is set, offset the result index
+        If loffset is set, offset the result index.
 
         This is NOT an idempotent routine, it will be applied
         exactly once to the result.
@@ -377,11 +387,15 @@ def _apply_loffset(self, result):
         return result
 
     def _get_resampler_for_grouping(self, groupby, **kwargs):
-        """ return the correct class for resampling with groupby """
+        """
+        Return the correct class for resampling with groupby.
+        """
         return self._resampler_for_grouping(self, groupby=groupby, **kwargs)
 
     def _wrap_result(self, result):
-        """ potentially wrap any results """
+        """
+        Potentially wrap any results.
+        """
         if isinstance(result, ABCSeries) and self._selection is not None:
             result.name = self._selection
 
@@ -394,7 +408,7 @@ def _wrap_result(self, result):
 
     def pad(self, limit=None):
         """
-        Forward fill the values
+        Forward fill the values.
 
         Parameters
         ----------
@@ -757,8 +771,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
 
     def asfreq(self, fill_value=None):
         """
-        return the values at the new freq,
-        essentially a reindex
+        Return the values at the new freq, essentially a reindex.
 
         Parameters
         ----------
@@ -777,7 +790,7 @@ def asfreq(self, fill_value=None):
 
     def std(self, ddof=1, *args, **kwargs):
         """
-        Compute standard deviation of groups, excluding missing values
+        Compute standard deviation of groups, excluding missing values.
 
         Parameters
         ----------
@@ -789,12 +802,12 @@ def std(self, ddof=1, *args, **kwargs):
 
     def var(self, ddof=1, *args, **kwargs):
         """
-        Compute variance of groups, excluding missing values
+        Compute variance of groups, excluding missing values.
 
         Parameters
         ----------
         ddof : integer, default 1
-        degrees of freedom
+            degrees of freedom
         """
         nv.validate_resampler_func('var', args, kwargs)
         return self._downsample('var', ddof=ddof)
@@ -863,8 +876,10 @@ def f(self, _method=method):
 
 
 def _maybe_process_deprecations(r, how=None, fill_method=None, limit=None):
-    """ potentially we might have a deprecation warning, show it
-    but call the appropriate methods anyhow """
+    """
+    Potentially we might have a deprecation warning, show it
+    but call the appropriate methods anyhow.
+    """
 
     if how is not None:
 
@@ -909,8 +924,9 @@ def _maybe_process_deprecations(r, how=None, fill_method=None, limit=None):
 
 
 class _GroupByMixin(GroupByMixin):
-    """ provide the groupby facilities """
-
+    """
+    Provide the groupby facilities.
+    """
     def __init__(self, obj, *args, **kwargs):
 
         parent = kwargs.pop('parent', None)
@@ -931,8 +947,8 @@ def __init__(self, obj, *args, **kwargs):
 
     def _apply(self, f, grouper=None, *args, **kwargs):
         """
-        dispatch to _upsample; we are stripping all of the _upsample kwargs and
-        performing the original function call on the grouped object
+        Dispatch to _upsample; we are stripping all of the _upsample kwargs and
+        performing the original function call on the grouped object.
         """
 
         def func(x):
@@ -966,7 +982,7 @@ def _get_binner_for_time(self):
 
     def _downsample(self, how, **kwargs):
         """
-        Downsample the cython defined function
+        Downsample the cython defined function.
 
         Parameters
         ----------
@@ -1003,6 +1019,7 @@ def _downsample(self, how, **kwargs):
     def _adjust_binner_for_upsample(self, binner):
         """
         Adjust our binner when upsampling.
+
         The range of a new index should not be outside specified range
         """
         if self.closed == 'right':
@@ -1013,6 +1030,8 @@ def _adjust_binner_for_upsample(self, binner):
 
     def _upsample(self, method, limit=None, fill_value=None):
         """
+        Parameters
+        ----------
         method : string {'backfill', 'bfill', 'pad',
             'ffill', 'asfreq'} method for upsampling
         limit : int, default None
@@ -1065,7 +1084,6 @@ class DatetimeIndexResamplerGroupby(_GroupByMixin, DatetimeIndexResampler):
     Provides a resample of a groupby implementation
 
     .. versionadded:: 0.18.1
-
     """
     @property
     def _constructor(self):
@@ -1106,7 +1124,7 @@ def _convert_obj(self, obj):
 
     def _downsample(self, how, **kwargs):
         """
-        Downsample the cython defined function
+        Downsample the cython defined function.
 
         Parameters
         ----------
@@ -1143,6 +1161,8 @@ def _downsample(self, how, **kwargs):
 
     def _upsample(self, method, limit=None, fill_value=None):
         """
+        Parameters
+        ----------
         method : string {'backfill', 'bfill', 'pad', 'ffill'}
             method for upsampling
         limit : int, default None
@@ -1177,10 +1197,9 @@ def _upsample(self, method, limit=None, fill_value=None):
 
 class PeriodIndexResamplerGroupby(_GroupByMixin, PeriodIndexResampler):
     """
-    Provides a resample of a groupby implementation
+    Provides a resample of a groupby implementation.
 
     .. versionadded:: 0.18.1
-
     """
     @property
     def _constructor(self):
@@ -1199,6 +1218,7 @@ def _get_binner_for_time(self):
     def _adjust_binner_for_upsample(self, binner):
         """
         Adjust our binner when upsampling.
+
         The range of a new index is allowed to be greater than original range
         so we don't need to change the length of a binner, GH 13022
         """
@@ -1207,10 +1227,9 @@ def _adjust_binner_for_upsample(self, binner):
 
 class TimedeltaIndexResamplerGroupby(_GroupByMixin, TimedeltaIndexResampler):
     """
-    Provides a resample of a groupby implementation
+    Provides a resample of a groupby implementation.
 
     .. versionadded:: 0.18.1
-
     """
     @property
     def _constructor(self):
@@ -1218,7 +1237,9 @@ def _constructor(self):
 
 
 def resample(obj, kind=None, **kwds):
-    """ create a TimeGrouper and return our resampler """
+    """
+    Create a TimeGrouper and return our resampler.
+    """
     tg = TimeGrouper(**kwds)
     return tg._get_resampler(obj, kind=kind)
 
@@ -1228,7 +1249,9 @@ def resample(obj, kind=None, **kwds):
 
 def get_resampler_for_grouping(groupby, rule, how=None, fill_method=None,
                                limit=None, kind=None, **kwargs):
-    """ return our appropriate resampler when grouping as well """
+    """
+    Return our appropriate resampler when grouping as well.
+    """
 
     # .resample uses 'on' similar to how .groupby uses 'key'
     kwargs['key'] = kwargs.pop('on', None)
@@ -1244,7 +1267,7 @@ def get_resampler_for_grouping(groupby, rule, how=None, fill_method=None,
 
 class TimeGrouper(Grouper):
     """
-    Custom groupby class for time-interval grouping
+    Custom groupby class for time-interval grouping.
 
     Parameters
     ----------
@@ -1311,7 +1334,7 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean',
 
     def _get_resampler(self, obj, kind=None):
         """
-        return my resampler or raise if we have an invalid axis
+        Return my resampler or raise if we have an invalid axis.
 
         Parameters
         ----------
@@ -1643,7 +1666,7 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
 
 def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None):
     """
-    Utility frequency conversion method for Series/DataFrame
+    Utility frequency conversion method for Series/DataFrame.
     """
     if isinstance(obj.index, PeriodIndex):
         if method is not None:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index c9b1a2c45eab38..4b8274a9e8333c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -75,7 +75,8 @@
 
 # see gh-16971
 def remove_na(arr):
-    """Remove null values from array like structure.
+    """
+    Remove null values from array like structure.
 
     .. deprecated:: 0.21.0
         Use s[s.notnull()] instead.
@@ -87,7 +88,9 @@ def remove_na(arr):
 
 
 def _coerce_method(converter):
-    """ install the scalar coercion methods """
+    """
+    Install the scalar coercion methods.
+    """
 
     def wrapper(self):
         if len(self) == 1:
@@ -310,7 +313,8 @@ def _init_dict(self, data, index=None, dtype=None):
     @classmethod
     def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
                    fastpath=False):
-        """Construct Series from array.
+        """
+        Construct Series from array.
 
         .. deprecated :: 0.23.0
             Use pd.Series(..) constructor instead.
@@ -341,7 +345,9 @@ def _can_hold_na(self):
     _index = None
 
     def _set_axis(self, axis, labels, fastpath=False):
-        """ override generic, we want to set the _typ here """
+        """
+        Override generic, we want to set the _typ here.
+        """
 
         if not fastpath:
             labels = ensure_index(labels)
@@ -418,8 +424,7 @@ def ftypes(self):
     @property
     def values(self):
         """
-        Return Series as ndarray or ndarray-like
-        depending on the dtype
+        Return Series as ndarray or ndarray-like depending on the dtype.
 
         Returns
         -------
@@ -455,8 +460,9 @@ def _values(self):
         return self._data.internal_values()
 
     def _formatting_values(self):
-        """Return the values that can be formatted (used by SeriesFormatter
-        and DataFrameFormatter)
+        """
+        Return the values that can be formatted (used by SeriesFormatter
+        and DataFrameFormatter).
         """
         return self._data.formatting_values()
 
@@ -468,7 +474,8 @@ def get_values(self):
 
     @property
     def asobject(self):
-        """Return object Series which contains boxed values.
+        """
+        Return object Series which contains boxed values.
 
         .. deprecated :: 0.23.0
 
@@ -483,7 +490,7 @@ def asobject(self):
     # ops
     def ravel(self, order='C'):
         """
-        Return the flattened underlying data as an ndarray
+        Return the flattened underlying data as an ndarray.
 
         See Also
         --------
@@ -493,7 +500,7 @@ def ravel(self, order='C'):
 
     def compress(self, condition, *args, **kwargs):
         """
-        Return selected slices of an array along given axis as a Series
+        Return selected slices of an array along given axis as a Series.
 
         .. deprecated:: 0.24.0
 
@@ -510,7 +517,7 @@ def compress(self, condition, *args, **kwargs):
 
     def nonzero(self):
         """
-        Return the *integer* indices of the elements that are non-zero
+        Return the *integer* indices of the elements that are non-zero.
 
         This method is equivalent to calling `numpy.nonzero` on the
         series data. For compatibility with NumPy, the return value is
@@ -545,8 +552,7 @@ def nonzero(self):
 
     def put(self, *args, **kwargs):
         """
-        Applies the `put` method to its `values` attribute
-        if it has one.
+        Applies the `put` method to its `values` attribute if it has one.
 
         See Also
         --------
@@ -556,7 +562,7 @@ def put(self, *args, **kwargs):
 
     def __len__(self):
         """
-        return the length of the Series
+        Return the length of the Series.
         """
         return len(self._data)
 
@@ -631,20 +637,20 @@ def view(self, dtype=None):
 
     def __array__(self, result=None):
         """
-        the array interface, return my values
+        The array interface, return my values.
         """
         return self.get_values()
 
     def __array_wrap__(self, result, context=None):
         """
-        Gets called after a ufunc
+        Gets called after a ufunc.
         """
         return self._constructor(result, index=self.index,
                                  copy=False).__finalize__(self)
 
     def __array_prepare__(self, result, context=None):
         """
-        Gets called prior to a ufunc
+        Gets called prior to a ufunc.
         """
 
         # nice error message for non-ufunc types
@@ -713,12 +719,14 @@ def _unpickle_series_compat(self, state):
     # indexers
     @property
     def axes(self):
-        """Return a list of the row axis labels"""
+        """
+        Return a list of the row axis labels.
+        """
         return [self.index]
 
     def _ixs(self, i, axis=0):
         """
-        Return the i-th value or values in the Series by location
+        Return the i-th value or values in the Series by location.
 
         Parameters
         ----------
@@ -1013,7 +1021,8 @@ def repeat(self, repeats, *args, **kwargs):
                                  index=new_index).__finalize__(self)
 
     def get_value(self, label, takeable=False):
-        """Quickly retrieve single value at passed index label
+        """
+        Quickly retrieve single value at passed index label.
 
         .. deprecated:: 0.21.0
             Please use .at[] or .iat[] accessors.
@@ -1040,9 +1049,11 @@ def _get_value(self, label, takeable=False):
     _get_value.__doc__ = get_value.__doc__
 
     def set_value(self, label, value, takeable=False):
-        """Quickly set single value at passed label. If label is not contained,
-        a new object is created with the label placed at the end of the result
-        index.
+        """
+        Quickly set single value at passed label.
+
+        If label is not contained, a new object is created with the label
+        placed at the end of the result index.
 
         .. deprecated:: 0.21.0
             Please use .at[] or .iat[] accessors.
@@ -1215,7 +1226,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False):
 
     def __unicode__(self):
         """
-        Return a string representation for a particular DataFrame
+        Return a string representation for a particular DataFrame.
 
         Invoked by unicode(df) in py2 only. Yields a Unicode String in both
         py2/py3.
@@ -1236,7 +1247,7 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
                   index=True, length=False, dtype=False, name=False,
                   max_rows=None):
         """
-        Render a string representation of the Series
+        Render a string representation of the Series.
 
         Parameters
         ----------
@@ -1290,7 +1301,7 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
 
     def iteritems(self):
         """
-        Lazily iterate over (index, value) tuples
+        Lazily iterate over (index, value) tuples.
         """
         return zip(iter(self.index), iter(self))
 
@@ -1300,7 +1311,9 @@ def iteritems(self):
     # Misc public methods
 
     def keys(self):
-        """Alias for index"""
+        """
+        Alias for index.
+        """
         return self.index
 
     def to_dict(self, into=dict):
@@ -1339,7 +1352,7 @@ def to_dict(self, into=dict):
 
     def to_frame(self, name=None):
         """
-        Convert Series to DataFrame
+        Convert Series to DataFrame.
 
         Parameters
         ----------
@@ -1360,7 +1373,7 @@ def to_frame(self, name=None):
 
     def to_sparse(self, kind='block', fill_value=None):
         """
-        Convert Series to SparseSeries
+        Convert Series to SparseSeries.
 
         Parameters
         ----------
@@ -1401,7 +1414,7 @@ def _set_name(self, name, inplace=False):
 
     def count(self, level=None):
         """
-        Return number of non-NA/null observations in the Series
+        Return number of non-NA/null observations in the Series.
 
         Parameters
         ----------
@@ -1433,7 +1446,8 @@ def count(self, level=None):
                                  dtype='int64').__finalize__(self)
 
     def mode(self, dropna=True):
-        """Return the mode(s) of the dataset.
+        """
+        Return the mode(s) of the dataset.
 
         Always returns Series even if only one value is returned.
 
@@ -1796,7 +1810,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs):
     # ndarray compat
     argmin = deprecate(
         'argmin', idxmin, '0.21.0',
-        msg=dedent("""\
+        msg=dedent("""
         The current behaviour of 'Series.argmin' is deprecated, use 'idxmin'
         instead.
         The behavior of 'argmin' will be corrected to return the positional
@@ -1806,7 +1820,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs):
     )
     argmax = deprecate(
         'argmax', idxmax, '0.21.0',
-        msg=dedent("""\
+        msg=dedent("""
         The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
         instead.
         The behavior of 'argmax' will be corrected to return the positional
@@ -1899,7 +1913,7 @@ def quantile(self, q=0.5, interpolation='linear'):
 
     def corr(self, other, method='pearson', min_periods=None):
         """
-        Compute correlation with `other` Series, excluding missing values
+        Compute correlation with `other` Series, excluding missing values.
 
         Parameters
         ----------
@@ -1942,7 +1956,7 @@ def corr(self, other, method='pearson', min_periods=None):
 
     def cov(self, other, min_periods=None):
         """
-        Compute covariance with Series, excluding missing values
+        Compute covariance with Series, excluding missing values.
 
         Parameters
         ----------
@@ -2149,11 +2163,15 @@ def dot(self, other):
             raise TypeError('unsupported type: %s' % type(other))
 
     def __matmul__(self, other):
-        """ Matrix multiplication using binary `@` operator in Python>=3.5 """
+        """
+        Matrix multiplication using binary `@` operator in Python>=3.5.
+        """
         return self.dot(other)
 
     def __rmatmul__(self, other):
-        """ Matrix multiplication using binary `@` operator in Python>=3.5 """
+        """
+        Matrix multiplication using binary `@` operator in Python>=3.5.
+        """
         return self.dot(np.transpose(other))
 
     @Substitution(klass='Series')
@@ -2250,7 +2268,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False):
 
     def _binop(self, other, func, level=None, fill_value=None):
         """
-        Perform generic binary operation with optional fill value
+        Perform generic binary operation with optional fill value.
 
         Parameters
         ----------
@@ -2357,7 +2375,7 @@ def combine(self, other, func, fill_value=None):
         eagle     200.0
         falcon    345.0
         dtype: float64
-"""
+        """
         if fill_value is None:
             fill_value = na_value_for_dtype(self.dtype, compat=False)
 
@@ -2439,7 +2457,7 @@ def combine_first(self, other):
     def update(self, other):
         """
         Modify Series in place using non-NA values from passed
-        Series. Aligns on index
+        Series. Aligns on index.
 
         Parameters
         ----------
@@ -2814,7 +2832,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
     def argsort(self, axis=0, kind='quicksort', order=None):
         """
         Overrides ndarray.argsort. Argsorts the value, omitting NA/null values,
-        and places the result in the same locations as the non-NA values
+        and places the result in the same locations as the non-NA values.
 
         Parameters
         ----------
@@ -3040,7 +3058,7 @@ def nsmallest(self, n=5, keep='first'):
 
     def swaplevel(self, i=-2, j=-1, copy=True):
         """
-        Swap levels i and j in a MultiIndex
+        Swap levels i and j in a MultiIndex.
 
         Parameters
         ----------
@@ -3062,8 +3080,9 @@ def swaplevel(self, i=-2, j=-1, copy=True):
 
     def reorder_levels(self, order):
         """
-        Rearrange index levels using input order. May not drop or duplicate
-        levels
+        Rearrange index levels using input order.
+
+        May not drop or duplicate levels.
 
         Parameters
         ----------
@@ -3208,8 +3227,7 @@ def map(self, arg, na_action=None):
 
     def _gotitem(self, key, ndim, subset=None):
         """
-        sub-classes to define
-        return a sliced object
+        Sub-classes to define. Return a sliced object.
 
         Parameters
         ----------
@@ -3290,8 +3308,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwds):
         """
         Invoke function on values of Series.
 
-        Can be ufunc (a NumPy function that applies to the entire Series) or a
-        Python function that only works on single values.
+        Can be ufunc (a NumPy function that applies to the entire Series)
+        or a Python function that only works on single values.
 
         Parameters
         ----------
@@ -3423,11 +3441,10 @@ def f(x):
     def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                 filter_type=None, **kwds):
         """
-        perform a reduction operation
-
-        if we have an ndarray as a value, then simply perform the operation,
-        otherwise delegate to the object
+        Perform a reduction operation.
 
+        If we have an ndarray as a value, then simply perform the operation,
+        otherwise delegate to the object.
         """
         delegate = self._values
 
@@ -3464,8 +3481,9 @@ def _reindex_indexer(self, new_index, indexer, copy):
         return self._constructor(new_values, index=new_index)
 
     def _needs_reindex_multi(self, axes, method, level):
-        """ check if we do need a multi reindex; this is for compat with
-        higher dims
+        """
+        Check if we do need a multi reindex; this is for compat with
+        higher dims.
         """
         return False
 
@@ -3480,7 +3498,8 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
                                          broadcast_axis=broadcast_axis)
 
     def rename(self, index=None, **kwargs):
-        """Alter Series index labels or name
+        """
+        Alter Series index labels or name.
 
         Function / dict values must be unique (1-to-1). Labels not contained in
         a dict / Series will be left as-is. Extra labels listed don't throw an
@@ -3664,7 +3683,8 @@ def shift(self, periods=1, freq=None, axis=0):
         return super(Series, self).shift(periods=periods, freq=freq, axis=axis)
 
     def reindex_axis(self, labels, axis=0, **kwargs):
-        """Conform Series to new index with optional filling logic.
+        """
+        Conform Series to new index with optional filling logic.
 
         .. deprecated:: 0.21.0
             Use ``Series.reindex`` instead.
@@ -3893,7 +3913,8 @@ def between(self, left, right, inclusive=True):
     @classmethod
     def from_csv(cls, path, sep=',', parse_dates=True, header=None,
                  index_col=0, encoding=None, infer_datetime_format=False):
-        """Read CSV file.
+        """
+        Read CSV file.
 
         .. deprecated:: 0.21.0
             Use :func:`pandas.read_csv` instead.
@@ -4123,7 +4144,8 @@ def dropna(self, axis=0, inplace=False, **kwargs):
                 return self.copy()
 
     def valid(self, inplace=False, **kwargs):
-        """Return Series without null values.
+        """
+        Return Series without null values.
 
         .. deprecated:: 0.23.0
             Use :meth:`Series.dropna` instead.
@@ -4137,7 +4159,7 @@ def valid(self, inplace=False, **kwargs):
 
     def to_timestamp(self, freq=None, how='start', copy=True):
         """
-        Cast to datetimeindex of timestamps, at *beginning* of period
+        Cast to datetimeindex of timestamps, at *beginning* of period.
 
         Parameters
         ----------
@@ -4162,7 +4184,7 @@ def to_timestamp(self, freq=None, how='start', copy=True):
     def to_period(self, freq=None, copy=True):
         """
         Convert Series from DatetimeIndex to PeriodIndex with desired
-        frequency (inferred from index if not passed)
+        frequency (inferred from index if not passed).
 
         Parameters
         ----------
@@ -4210,8 +4232,9 @@ def to_period(self, freq=None, copy=True):
 
 
 def _sanitize_index(data, index, copy=False):
-    """ sanitize an index type to return an ndarray of the underlying, pass
-    thru a non-Index
+    """
+    Sanitize an index type to return an ndarray of the underlying, pass
+    through a non-Index.
     """
 
     if index is None:
@@ -4238,8 +4261,9 @@ def _sanitize_index(data, index, copy=False):
 
 def _sanitize_array(data, index, dtype=None, copy=False,
                     raise_cast_failure=False):
-    """ sanitize input data to an ndarray, copy if specified, coerce to the
-    dtype if specified
+    """
+    Sanitize input data to an ndarray, copy if specified, coerce to the
+    dtype if specified.
     """
 
     if dtype is not None:
diff --git a/pandas/core/window.py b/pandas/core/window.py
index 4c67b04e89ba83..a079cea0fabd1f 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -1,9 +1,6 @@
 """
-
-provide a generic structure to support window functions,
-similar to how we have a Groupby object
-
-
+Provide a generic structure to support window functions,
+similar to how we have a Groupby object.
 """
 from __future__ import division
 
@@ -95,14 +92,17 @@ def validate(self):
                              "'neither'")
 
     def _convert_freq(self):
-        """ resample according to the how, return a new object """
-
+        """
+        Resample according to the how, return a new object.
+        """
         obj = self._selected_obj
         index = None
         return obj, index
 
     def _create_blocks(self):
-        """ split data into blocks & return conformed data """
+        """
+        Split data into blocks & return conformed data.
+        """
 
         obj, index = self._convert_freq()
         if index is not None:
@@ -119,8 +119,7 @@ def _create_blocks(self):
 
     def _gotitem(self, key, ndim, subset=None):
         """
-        sub-classes to define
-        return a sliced object
+        Sub-classes to define. Return a sliced object.
 
         Parameters
         ----------
@@ -161,7 +160,9 @@ def _window_type(self):
         return self.__class__.__name__
 
     def __unicode__(self):
-        """ provide a nice str repr of our rolling object """
+        """
+        Provide a nice str repr of our rolling object.
+        """
 
         attrs = ["{k}={v}".format(k=k, v=getattr(self, k))
                  for k in self._attributes
@@ -175,7 +176,7 @@ def __iter__(self):
 
     def _get_index(self, index=None):
         """
-        Return index as ndarrays
+        Return index as ndarrays.
 
         Returns
         -------
@@ -219,7 +220,9 @@ def _prep_values(self, values=None, kill_inf=True):
         return values
 
     def _wrap_result(self, result, block=None, obj=None):
-        """ wrap a single result """
+        """
+        Wrap a single result.
+        """
 
         if obj is None:
             obj = self._selected_obj
@@ -243,7 +246,7 @@ def _wrap_result(self, result, block=None, obj=None):
 
     def _wrap_results(self, results, blocks, obj):
         """
-        wrap the results
+        Wrap the results.
 
         Parameters
         ----------
@@ -288,7 +291,9 @@ def _wrap_results(self, results, blocks, obj):
         return concat(final, axis=1).reindex(columns=columns, copy=False)
 
     def _center_window(self, result, window):
-        """ center the result in the window """
+        """
+        Center the result in the window.
+        """
         if self.axis > result.ndim - 1:
             raise ValueError("Requested axis is larger then no. of argument "
                              "dimensions")
@@ -606,8 +611,8 @@ def validate(self):
 
     def _prep_window(self, **kwargs):
         """
-        provide validation for our window type, return the window
-        we have already been validated
+        Provide validation for our window type, return the window
+        we have already been validated.
         """
 
         window = self._get_window()
@@ -757,7 +762,9 @@ def mean(self, *args, **kwargs):
 
 
 class _GroupByMixin(GroupByMixin):
-    """ provide the groupby facilities """
+    """
+    Provide the groupby facilities.
+    """
 
     def __init__(self, obj, *args, **kwargs):
         parent = kwargs.pop('parent', None)  # noqa
@@ -776,8 +783,8 @@ def __init__(self, obj, *args, **kwargs):
     def _apply(self, func, name, window=None, center=None,
                check_minp=None, **kwargs):
         """
-        dispatch to apply; we are stripping all of the _apply kwargs and
-        performing the original function call on the grouped object
+        Dispatch to apply; we are stripping all of the _apply kwargs and
+        performing the original function call on the grouped object.
         """
 
         def f(x, name=name, *args):
@@ -800,8 +807,9 @@ def _constructor(self):
     def _apply(self, func, name=None, window=None, center=None,
                check_minp=None, **kwargs):
         """
-        Rolling statistical measure using supplied function. Designed to be
-        used with passed-in Cython array-based functions.
+        Rolling statistical measure using supplied function.
+
+        Designed to be used with passed-in Cython array-based functions.
 
         Parameters
         ----------
@@ -937,7 +945,7 @@ def count(self):
         return self._wrap_results(results, blocks, obj)
 
     _shared_docs['apply'] = dedent(r"""
-    %(name)s function apply
+    %(name)s function apply.
 
     Parameters
     ----------
@@ -995,7 +1003,7 @@ def sum(self, *args, **kwargs):
         return self._apply('roll_sum', 'sum', **kwargs)
 
     _shared_docs['max'] = dedent("""
-    %(name)s maximum
+    Calculate the %(name)s maximum.
     """)
 
     def max(self, *args, **kwargs):
@@ -1259,7 +1267,7 @@ def kurt(self, **kwargs):
                            check_minp=_require_min_periods(4), **kwargs)
 
     _shared_docs['quantile'] = dedent("""
-    %(name)s quantile.
+    Calculate the %(name)s quantile.
 
     Parameters
     ----------
@@ -1333,7 +1341,7 @@ def f(arg, *args, **kwargs):
                            **kwargs)
 
     _shared_docs['cov'] = dedent("""
-    %(name)s sample covariance
+    Calculate the %(name)s sample covariance.
 
     Parameters
     ----------
@@ -1485,7 +1493,7 @@ def _get_cov(X, Y):
       Y  0.626300  1.000000
     4 X  1.000000  0.555368
       Y  0.555368  1.000000
-""")
+    """)
 
     def corr(self, other=None, pairwise=None, **kwargs):
         if other is None:
@@ -1566,14 +1574,18 @@ def validate(self):
                              "and offset based windows")
 
     def _validate_monotonic(self):
-        """ validate on is monotonic """
+        """
+        Validate on is_monotonic.
+        """
         if not self._on.is_monotonic:
             formatted = self.on or 'index'
             raise ValueError("{0} must be "
                              "monotonic".format(formatted))
 
     def _validate_freq(self):
-        """ validate & return window frequency """
+        """
+        Validate & return window frequency.
+        """
         from pandas.tseries.frequencies import to_offset
         try:
             return to_offset(self.window)
@@ -1760,7 +1772,7 @@ def corr(self, other=None, pairwise=None, **kwargs):
 
 class RollingGroupby(_GroupByMixin, Rolling):
     """
-    Provides a rolling groupby implementation
+    Provides a rolling groupby implementation.
 
     .. versionadded:: 0.18.1
 
@@ -1781,10 +1793,10 @@ def _gotitem(self, key, ndim, subset=None):
 
     def _validate_monotonic(self):
         """
-        validate that on is monotonic;
+        Validate that on is monotonic;
         we don't care for groupby.rolling
         because we have already validated at a higher
-        level
+        level.
         """
         pass
 
@@ -2031,7 +2043,7 @@ def corr(self, other=None, pairwise=None, **kwargs):
 
 class ExpandingGroupby(_GroupByMixin, Expanding):
     """
-    Provides a expanding groupby implementation
+    Provides a expanding groupby implementation.
 
     .. versionadded:: 0.18.1
 
@@ -2069,7 +2081,7 @@ def _constructor(self):
 
 class EWM(_Rolling):
     r"""
-    Provides exponential weighted functions
+    Provides exponential weighted functions.
 
     .. versionadded:: 0.18.0
 
@@ -2219,7 +2231,8 @@ def aggregate(self, arg, *args, **kwargs):
     agg = aggregate
 
     def _apply(self, func, **kwargs):
-        """Rolling statistical measure using supplied function. Designed to be
+        """
+        Rolling statistical measure using supplied function. Designed to be
         used with passed-in Cython array-based functions.
 
         Parameters
@@ -2261,7 +2274,9 @@ def func(arg):
     @Substitution(name='ewm')
     @Appender(_doc_template)
     def mean(self, *args, **kwargs):
-        """exponential weighted moving average"""
+        """
+        Exponential weighted moving average.
+        """
         nv.validate_window_func('mean', args, kwargs)
         return self._apply('ewma', **kwargs)
 
@@ -2269,7 +2284,9 @@ def mean(self, *args, **kwargs):
     @Appender(_doc_template)
     @Appender(_bias_template)
     def std(self, bias=False, *args, **kwargs):
-        """exponential weighted moving stddev"""
+        """
+        Exponential weighted moving stddev.
+        """
         nv.validate_window_func('std', args, kwargs)
         return _zsqrt(self.var(bias=bias, **kwargs))
 
@@ -2279,7 +2296,9 @@ def std(self, bias=False, *args, **kwargs):
     @Appender(_doc_template)
     @Appender(_bias_template)
     def var(self, bias=False, *args, **kwargs):
-        """exponential weighted moving variance"""
+        """
+        Exponential weighted moving variance.
+        """
         nv.validate_window_func('var', args, kwargs)
 
         def f(arg):
@@ -2293,7 +2312,9 @@ def f(arg):
     @Appender(_doc_template)
     @Appender(_pairwise_template)
     def cov(self, other=None, pairwise=None, bias=False, **kwargs):
-        """exponential weighted sample covariance"""
+        """
+        Exponential weighted sample covariance.
+        """
         if other is None:
             other = self._selected_obj
             # only default unset
@@ -2316,7 +2337,9 @@ def _get_cov(X, Y):
     @Appender(_doc_template)
     @Appender(_pairwise_template)
     def corr(self, other=None, pairwise=None, **kwargs):
-        """exponential weighted sample correlation"""
+        """
+        Exponential weighted sample correlation.
+        """
         if other is None:
             other = self._selected_obj
             # only default unset
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index 2c018598a6a6e1..dda50b6a0e7f8e 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -53,8 +53,7 @@ def _mpl(func):
 
 class Styler(object):
     """
-    Helps style a DataFrame or Series according to the
-    data with HTML and CSS.
+    Helps style a DataFrame or Series according to the data with HTML and CSS.
 
     Parameters
     ----------
@@ -157,7 +156,9 @@ def default_display_func(x):
         self._display_funcs = defaultdict(lambda: default_display_func)
 
     def _repr_html_(self):
-        """Hooks into Jupyter notebook rich display system."""
+        """
+        Hooks into Jupyter notebook rich display system.
+        """
         return self.render()
 
     @Appender(_shared_docs['to_excel'] % dict(
@@ -187,7 +188,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
     def _translate(self):
         """
         Convert the DataFrame in `self.data` and the attrs from `_build_styles`
-        into a dictionary of {head, body, uuid, cellstyle}
+        into a dictionary of {head, body, uuid, cellstyle}.
         """
         table_styles = self.table_styles or []
         caption = self.caption
@@ -417,7 +418,8 @@ def format(self, formatter, subset=None):
         return self
 
     def render(self, **kwargs):
-        """Render the built up styles to HTML
+        """
+        Render the built up styles to HTML.
 
         Parameters
         ----------
@@ -467,8 +469,9 @@ def render(self, **kwargs):
 
     def _update_ctx(self, attrs):
         """
-        update the state of the Styler. Collects a mapping
-        of {index_label: ['<property>: <value>']}
+        Update the state of the Styler.
+
+        Collects a mapping of {index_label: ['<property>: <value>']}.
 
         attrs : Series or DataFrame
         should contain strings of '<property>: <value>;<prop2>: <val2>'
@@ -504,7 +507,8 @@ def __deepcopy__(self, memo):
         return self._copy(deepcopy=True)
 
     def clear(self):
-        """"Reset" the styler, removing any previously applied styles.
+        """
+        Reset the styler, removing any previously applied styles.
         Returns None.
         """
         self.ctx.clear()
@@ -696,9 +700,10 @@ def set_precision(self, precision):
 
     def set_table_attributes(self, attributes):
         """
-        Set the table attributes. These are the items
-        that show up in the opening ``<table>`` tag in addition
-        to to automatic (by default) id.
+        Set the table attributes.
+
+        These are the items that show up in the opening ``<table>`` tag
+        in addition to to automatic (by default) id.
 
         Parameters
         ----------
@@ -720,6 +725,7 @@ def set_table_attributes(self, attributes):
     def export(self):
         """
         Export the styles to applied to the current Styler.
+
         Can be applied to a second style with ``Styler.use``.
 
         Returns
@@ -785,8 +791,9 @@ def set_caption(self, caption):
 
     def set_table_styles(self, table_styles):
         """
-        Set the table styles on a Styler. These are placed in a
-        ``<style>`` tag before the generated HTML table.
+        Set the table styles on a Styler.
+
+        These are placed in a ``<style>`` tag before the generated HTML table.
 
         Parameters
         ----------
@@ -875,6 +882,7 @@ def background_gradient(self, cmap='PuBu', low=0, high=0, axis=0,
         """
         Color the background in a gradient according to
         the data in each column (optionally row).
+
         Requires matplotlib.
 
         Parameters
@@ -920,7 +928,9 @@ def background_gradient(self, cmap='PuBu', low=0, high=0, axis=0,
     @staticmethod
     def _background_gradient(s, cmap='PuBu', low=0, high=0,
                              text_color_threshold=0.408):
-        """Color background in a range according to the data."""
+        """
+        Color background in a range according to the data.
+        """
         if (not isinstance(text_color_threshold, (float, int)) or
                 not 0 <= text_color_threshold <= 1):
             msg = "`text_color_threshold` must be a value from 0 to 1."
@@ -1002,8 +1012,9 @@ def set_properties(self, subset=None, **kwargs):
 
     @staticmethod
     def _bar(s, align, colors, width=100, vmin=None, vmax=None):
-        """Draw bar chart in dataframe cells"""
-
+        """
+        Draw bar chart in dataframe cells.
+        """
         # Get input value range.
         smin = s.min() if vmin is None else vmin
         if isinstance(smin, ABCSeries):
@@ -1023,7 +1034,9 @@ def _bar(s, align, colors, width=100, vmin=None, vmax=None):
         zero = -width * smin / (smax - smin + 1e-12)
 
         def css_bar(start, end, color):
-            """Generate CSS code to draw a bar from start to end."""
+            """
+            Generate CSS code to draw a bar from start to end.
+            """
             css = 'width: 10em; height: 80%;'
             if end > start:
                 css += 'background: linear-gradient(90deg,'
@@ -1128,7 +1141,7 @@ def bar(self, subset=None, axis=0, color='#d65f5f', width=100,
 
     def highlight_max(self, subset=None, color='yellow', axis=0):
         """
-        Highlight the maximum by shading the background
+        Highlight the maximum by shading the background.
 
         Parameters
         ----------
@@ -1148,7 +1161,7 @@ def highlight_max(self, subset=None, color='yellow', axis=0):
 
     def highlight_min(self, subset=None, color='yellow', axis=0):
         """
-        Highlight the minimum by shading the background
+        Highlight the minimum by shading the background.
 
         Parameters
         ----------
@@ -1175,7 +1188,9 @@ def _highlight_handler(self, subset=None, color='yellow', axis=None,
 
     @staticmethod
     def _highlight_extrema(data, color='yellow', max_=True):
-        """Highlight the min or max in a Series or DataFrame"""
+        """
+        Highlight the min or max in a Series or DataFrame.
+        """
         attr = 'background-color: {0}'.format(color)
         if data.ndim == 1:  # Series from .apply
             if max_:
@@ -1223,7 +1238,7 @@ class MyStyler(cls):
 
 def _is_visible(idx_row, idx_col, lengths):
     """
-    Index -> {(idx_row, idx_col): bool})
+    Index -> {(idx_row, idx_col): bool}).
     """
     return (idx_col, idx_row) in lengths
 
@@ -1231,6 +1246,7 @@ def _is_visible(idx_row, idx_col, lengths):
 def _get_level_lengths(index, hidden_elements=None):
     """
     Given an index, find the level length for each element.
+
     Optional argument is a list of index positions which
     should not be visible.
 
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
index 38f8cd54120154..630943f4ec1bb1 100644
--- a/pandas/io/json/json.py
+++ b/pandas/io/json/json.py
@@ -77,7 +77,6 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
 
 
 class Writer(object):
-
     def __init__(self, obj, orient, date_format, double_precision,
                  ensure_ascii, date_unit, index, default_handler=None):
         self.obj = obj
@@ -139,7 +138,9 @@ class FrameWriter(Writer):
     _default_orient = 'columns'
 
     def _format_axes(self):
-        """ try to axes if they are datelike """
+        """
+        Try to format axes if they are datelike.
+        """
         if not self.obj.index.is_unique and self.orient in (
                 'index', 'columns'):
             raise ValueError("DataFrame index must be unique for orient="
@@ -230,7 +231,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
               numpy=False, precise_float=False, date_unit=None, encoding=None,
               lines=False, chunksize=None, compression='infer'):
     """
-    Convert a JSON string to pandas object
+    Convert a JSON string to pandas object.
 
     Parameters
     ----------
@@ -486,7 +487,7 @@ def _preprocess_data(self, data):
 
     def _get_data_from_filepath(self, filepath_or_buffer):
         """
-        read_json accepts three input types:
+        The function read_json accepts three input types:
             1. filepath (string-like)
             2. file-like object (e.g. open file object, StringIO)
             3. JSON string
@@ -494,7 +495,6 @@ def _get_data_from_filepath(self, filepath_or_buffer):
         This method turns (1) into (2) to simplify the rest of the processing.
         It returns input types (2) and (3) unchanged.
         """
-
         data = filepath_or_buffer
 
         exists = False
@@ -515,12 +515,16 @@ def _get_data_from_filepath(self, filepath_or_buffer):
         return data
 
     def _combine_lines(self, lines):
-        """Combines a list of JSON objects into one JSON object"""
+        """
+        Combines a list of JSON objects into one JSON object.
+        """
         lines = filter(None, map(lambda x: x.strip(), lines))
         return '[' + ','.join(lines) + ']'
 
     def read(self):
-        """Read the whole JSON input into a pandas object"""
+        """
+        Read the whole JSON input into a pandas object.
+        """
         if self.lines and self.chunksize:
             obj = concat(self)
         elif self.lines:
@@ -535,7 +539,9 @@ def read(self):
         return obj
 
     def _get_object_parser(self, json):
-        """parses a json document into a pandas object"""
+        """
+        Parses a json document into a pandas object.
+        """
         typ = self.typ
         dtype = self.dtype
         kwargs = {
@@ -559,7 +565,9 @@ def _get_object_parser(self, json):
     def close(self):
         """
         If we opened a stream earlier, in _get_data_from_filepath, we should
-        close it. If an open stream or file was passed, we leave it open.
+        close it.
+
+        If an open stream or file was passed, we leave it open.
         """
         if self.should_close:
             try:
@@ -624,7 +632,9 @@ def __init__(self, json, orient, dtype=True, convert_axes=True,
         self.obj = None
 
     def check_keys_split(self, decoded):
-        "checks that dict has only the appropriate keys for orient='split'"
+        """
+        Checks that dict has only the appropriate keys for orient='split'.
+        """
         bad_keys = set(decoded.keys()).difference(set(self._split_keys))
         if bad_keys:
             bad_keys = ", ".join(bad_keys)
@@ -649,7 +659,9 @@ def parse(self):
         return self.obj
 
     def _convert_axes(self):
-        """ try to convert axes """
+        """
+        Try to convert axes.
+        """
         for axis in self.obj._AXIS_NUMBERS.keys():
             new_axis, result = self._try_convert_data(
                 axis, self.obj._get_axis(axis), use_dtypes=False,
@@ -662,7 +674,9 @@ def _try_convert_types(self):
 
     def _try_convert_data(self, name, data, use_dtypes=True,
                           convert_dates=True):
-        """ try to parse a ndarray like into a column by inferring dtype """
+        """
+        Try to parse a ndarray like into a column by inferring dtype.
+        """
 
         # don't try to coerce, unless a force conversion
         if use_dtypes:
@@ -670,9 +684,7 @@ def _try_convert_data(self, name, data, use_dtypes=True,
                 return data, False
             elif self.dtype is True:
                 pass
-
             else:
-
                 # dtype to force
                 dtype = (self.dtype.get(name)
                          if isinstance(self.dtype, dict) else self.dtype)
@@ -710,7 +722,7 @@ def _try_convert_data(self, name, data, use_dtypes=True,
                 except (TypeError, ValueError):
                     pass
 
-        # do't coerce 0-len data
+        # don't coerce 0-len data
         if len(data) and (data.dtype == 'float' or data.dtype == 'object'):
 
             # coerce ints if we can
@@ -735,10 +747,12 @@ def _try_convert_data(self, name, data, use_dtypes=True,
         return data, result
 
     def _try_convert_to_date(self, data):
-        """ try to parse a ndarray like into a date column
-            try to coerce object in epoch/iso formats and
-            integer/float in epcoh formats, return a boolean if parsing
-            was successful """
+        """
+        Try to parse a ndarray like into a date column.
+
+        Try to coerce object in epoch/iso formats and integer/float in epoch
+        formats. Return a boolean if parsing was successful.
+        """
 
         # no conversion on empty
         if not len(data):
@@ -871,7 +885,9 @@ def _parse_no_numpy(self):
                 loads(json, precise_float=self.precise_float), dtype=None)
 
     def _process_converter(self, f, filt=None):
-        """ take a conversion function and possibly recreate the frame """
+        """
+        Take a conversion function and possibly recreate the frame.
+        """
 
         if filt is None:
             filt = lambda col, c: True
@@ -913,7 +929,9 @@ def _try_convert_dates(self):
         convert_dates = set(convert_dates)
 
         def is_ok(col):
-            """ return if this col is ok to try for a date parse """
+            """
+            Return if this col is ok to try for a date parse.
+            """
             if not isinstance(col, compat.string_types):
                 return False
 
diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py
index 3e3f125b56d88a..8847f98845b22b 100644
--- a/pandas/io/json/normalize.py
+++ b/pandas/io/json/normalize.py
@@ -12,7 +12,9 @@
 
 
 def _convert_to_line_delimits(s):
-    """Helper function that converts json lists to line delimited json."""
+    """
+    Helper function that converts JSON lists to line delimited JSON.
+    """
 
     # Determine we have a JSON list to turn to lines otherwise just return the
     # json object, only lists can
@@ -24,9 +26,10 @@ def _convert_to_line_delimits(s):
 
 
 def nested_to_record(ds, prefix="", sep=".", level=0):
-    """a simplified json_normalize
+    """
+    A simplified json_normalize.
 
-    converts a nested dict into a flat dict ("record"), unlike json_normalize,
+    Converts a nested dict into a flat dict ("record"), unlike json_normalize,
     it does not attempt to extract a subset of the data.
 
     Parameters
@@ -99,7 +102,7 @@ def json_normalize(data, record_path=None, meta=None,
                    errors='raise',
                    sep='.'):
     """
-    "Normalize" semi-structured JSON data into a flat table
+    Normalize semi-structured JSON data into a flat table.
 
     Parameters
     ----------
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index ab1c89ef828d0d..640034cb49d25d 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -414,7 +414,7 @@ def _is_metadata_of(group, parent_group):
 class HDFStore(StringMixin):
 
     """
-    dict-like IO interface for storing pandas objects in PyTables
+    Dict-like IO interface for storing pandas objects in PyTables
     either Fixed or Table format.
 
     Parameters
@@ -1217,7 +1217,7 @@ def copy(self, file, mode='w', propindexes=True, keys=None, complib=None,
 
     def info(self):
         """
-        print detailed information on the store
+        Print detailed information on the store.
 
         .. versionadded:: 0.21.0
         """
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index cca4ba646b93bb..2341b3a1605c05 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -248,7 +248,7 @@ def apply_index(self, i):
         """
         Vectorized apply of DateOffset to DatetimeIndex,
         raises NotImplentedError for offsets without a
-        vectorized implementation
+        vectorized implementation.
 
         Parameters
         ----------
@@ -330,14 +330,18 @@ def name(self):
         return self.rule_code
 
     def rollback(self, dt):
-        """Roll provided date backward to next offset only if not on offset"""
+        """
+        Roll provided date backward to next offset only if not on offset.
+        """
         dt = as_timestamp(dt)
         if not self.onOffset(dt):
             dt = dt - self.__class__(1, normalize=self.normalize, **self.kwds)
         return dt
 
     def rollforward(self, dt):
-        """Roll provided date forward to next offset only if not on offset"""
+        """
+        Roll provided date forward to next offset only if not on offset.
+        """
         dt = as_timestamp(dt)
         if not self.onOffset(dt):
             dt = dt + self.__class__(1, normalize=self.normalize, **self.kwds)
@@ -407,7 +411,7 @@ def _from_name(cls, suffix=None):
 class _CustomMixin(object):
     """
     Mixin for classes that define and validate calendar, holidays,
-    and weekdays attributes
+    and weekdays attributes.
     """
     def __init__(self, weekmask, holidays, calendar):
         calendar, holidays = _get_calendar(weekmask=weekmask,
@@ -423,11 +427,15 @@ def __init__(self, weekmask, holidays, calendar):
 
 
 class BusinessMixin(object):
-    """ Mixin to business types to provide related functions """
+    """
+    Mixin to business types to provide related functions.
+    """
 
     @property
     def offset(self):
-        """Alias for self._offset"""
+        """
+        Alias for self._offset.
+        """
         # Alias for backward compat
         return self._offset
 
@@ -444,7 +452,7 @@ def _repr_attrs(self):
 
 class BusinessDay(BusinessMixin, SingleConstructorOffset):
     """
-    DateOffset subclass representing possibly n business days
+    DateOffset subclass representing possibly n business days.
     """
     _prefix = 'B'
     _adjust_dst = True
@@ -564,7 +572,9 @@ def __init__(self, start='09:00', end='17:00', offset=timedelta(0)):
 
     @cache_readonly
     def next_bday(self):
-        """used for moving to next businessday"""
+        """
+        Used for moving to next business day.
+        """
         if self.n >= 0:
             nb_offset = 1
         else:
@@ -637,7 +647,9 @@ def _get_business_hours_by_sec(self):
 
     @apply_wraps
     def rollback(self, dt):
-        """Roll provided date backward to next offset only if not on offset"""
+        """
+        Roll provided date backward to next offset only if not on offset.
+        """
         if not self.onOffset(dt):
             businesshours = self._get_business_hours_by_sec
             if self.n >= 0:
@@ -650,7 +662,9 @@ def rollback(self, dt):
 
     @apply_wraps
     def rollforward(self, dt):
-        """Roll provided date forward to next offset only if not on offset"""
+        """
+        Roll provided date forward to next offset only if not on offset.
+        """
         if not self.onOffset(dt):
             if self.n >= 0:
                 return self._next_opening_time(dt)
@@ -747,7 +761,7 @@ def onOffset(self, dt):
 
     def _onOffset(self, dt, businesshours):
         """
-        Slight speedups using calculated values
+        Slight speedups using calculated values.
         """
         # if self.normalize and not _is_normalized(dt):
         #     return False
@@ -775,7 +789,7 @@ def _repr_attrs(self):
 
 class BusinessHour(BusinessHourMixin, SingleConstructorOffset):
     """
-    DateOffset subclass representing possibly n business days
+    DateOffset subclass representing possibly n business days.
 
     .. versionadded:: 0.16.1
     """
@@ -792,7 +806,7 @@ def __init__(self, n=1, normalize=False, start='09:00',
 class CustomBusinessDay(_CustomMixin, BusinessDay):
     """
     DateOffset subclass representing possibly n custom business days,
-    excluding holidays
+    excluding holidays.
 
     Parameters
     ----------
@@ -859,7 +873,7 @@ def onOffset(self, dt):
 class CustomBusinessHour(_CustomMixin, BusinessHourMixin,
                          SingleConstructorOffset):
     """
-    DateOffset subclass representing possibly n custom business days
+    DateOffset subclass representing possibly n custom business days.
 
     .. versionadded:: 0.18.1
     """
@@ -916,25 +930,33 @@ def apply_index(self, i):
 
 
 class MonthEnd(MonthOffset):
-    """DateOffset of one month end"""
+    """
+    DateOffset of one month end.
+    """
     _prefix = 'M'
     _day_opt = 'end'
 
 
 class MonthBegin(MonthOffset):
-    """DateOffset of one month at beginning"""
+    """
+    DateOffset of one month at beginning.
+    """
     _prefix = 'MS'
     _day_opt = 'start'
 
 
 class BusinessMonthEnd(MonthOffset):
-    """DateOffset increments between business EOM dates"""
+    """
+    DateOffset increments between business EOM dates.
+    """
     _prefix = 'BM'
     _day_opt = 'business_end'
 
 
 class BusinessMonthBegin(MonthOffset):
-    """DateOffset of one business month at beginning"""
+    """
+    DateOffset of one business month at beginning.
+    """
     _prefix = 'BMS'
     _day_opt = 'business_start'
 
@@ -942,7 +964,7 @@ class BusinessMonthBegin(MonthOffset):
 class _CustomBusinessMonth(_CustomMixin, BusinessMixin, MonthOffset):
     """
     DateOffset subclass representing one custom business month, incrementing
-    between [BEGIN/END] of month dates
+    between [BEGIN/END] of month dates.
 
     Parameters
     ----------
@@ -972,7 +994,9 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
 
     @cache_readonly
     def cbday_roll(self):
-        """Define default roll function to be called in apply method"""
+        """
+        Define default roll function to be called in apply method.
+        """
         cbday = CustomBusinessDay(n=self.n, normalize=False, **self.kwds)
 
         if self._prefix.endswith('S'):
@@ -995,7 +1019,9 @@ def m_offset(self):
 
     @cache_readonly
     def month_roll(self):
-        """Define default roll function to be called in apply method"""
+        """
+        Define default roll function to be called in apply method.
+        """
         if self._prefix.endswith('S'):
             # MonthBegin
             roll_func = self.m_offset.rollback
@@ -1085,7 +1111,9 @@ def apply(self, other):
         return self._apply(n, other)
 
     def _apply(self, n, other):
-        """Handle specific apply logic for child classes"""
+        """
+        Handle specific apply logic for child classes.
+        """
         raise AbstractMethodError(self)
 
     @apply_index_wraps
@@ -1121,7 +1149,8 @@ def apply_index(self, i):
         return i + time
 
     def _get_roll(self, i, before_day_of_month, after_day_of_month):
-        """Return an array with the correct n for each date in i.
+        """
+        Return an array with the correct n for each date in i.
 
         The roll array is based on the fact that i gets rolled back to
         the first day of the month.
@@ -1129,7 +1158,9 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month):
         raise AbstractMethodError(self)
 
     def _apply_index_days(self, i, roll):
-        """Apply the correct day for each date in i"""
+        """
+        Apply the correct day for each date in i.
+        """
         raise AbstractMethodError(self)
 
 
@@ -1176,7 +1207,8 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month):
         return roll
 
     def _apply_index_days(self, i, roll):
-        """Add days portion of offset to DatetimeIndex i
+        """
+        Add days portion of offset to DatetimeIndex i.
 
         Parameters
         ----------
@@ -1233,7 +1265,8 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month):
         return roll
 
     def _apply_index_days(self, i, roll):
-        """Add days portion of offset to DatetimeIndex i
+        """
+        Add days portion of offset to DatetimeIndex i.
 
         Parameters
         ----------
@@ -1253,7 +1286,7 @@ def _apply_index_days(self, i, roll):
 
 class Week(DateOffset):
     """
-    Weekly offset
+    Weekly offset.
 
     Parameters
     ----------
@@ -1302,7 +1335,8 @@ def apply_index(self, i):
             return self._end_apply_index(i)
 
     def _end_apply_index(self, dtindex):
-        """Add self to the given DatetimeIndex, specialized for case where
+        """
+        Add self to the given DatetimeIndex, specialized for case where
         self.weekday is non-null.
 
         Parameters
@@ -1359,7 +1393,9 @@ def _from_name(cls, suffix=None):
 
 
 class _WeekOfMonthMixin(object):
-    """Mixin for methods common to WeekOfMonth and LastWeekOfMonth"""
+    """
+    Mixin for methods common to WeekOfMonth and LastWeekOfMonth.
+    """
     @apply_wraps
     def apply(self, other):
         compare_day = self._get_offset_day(other)
@@ -1382,7 +1418,7 @@ def onOffset(self, dt):
 
 class WeekOfMonth(_WeekOfMonthMixin, DateOffset):
     """
-    Describes monthly dates like "the Tuesday of the 2nd week of each month"
+    Describes monthly dates like "the Tuesday of the 2nd week of each month".
 
     Parameters
     ----------
@@ -1454,7 +1490,7 @@ def _from_name(cls, suffix=None):
 class LastWeekOfMonth(_WeekOfMonthMixin, DateOffset):
     """
     Describes monthly dates in last week of month like "the last Tuesday of
-    each month"
+    each month".
 
     Parameters
     ----------
@@ -1522,7 +1558,9 @@ def _from_name(cls, suffix=None):
 
 
 class QuarterOffset(DateOffset):
-    """Quarter representation - doesn't call super"""
+    """
+    Quarter representation - doesn't call super.
+    """
     _default_startingMonth = None
     _from_name_startingMonth = None
     _adjust_dst = True
@@ -1583,7 +1621,9 @@ def apply_index(self, dtindex):
 
 
 class BQuarterEnd(QuarterOffset):
-    """DateOffset increments between business Quarter dates
+    """
+    DateOffset increments between business Quarter dates.
+
     startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ...
     startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ...
     startingMonth = 3 corresponds to dates like 3/30/2007, 6/29/2007, ...
@@ -1606,7 +1646,9 @@ class BQuarterBegin(QuarterOffset):
 
 
 class QuarterEnd(QuarterOffset):
-    """DateOffset increments between business Quarter dates
+    """
+    DateOffset increments between business Quarter dates.
+
     startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ...
     startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ...
     startingMonth = 3 corresponds to dates like 3/31/2007, 6/30/2007, ...
@@ -1629,7 +1671,9 @@ class QuarterBegin(QuarterOffset):
 # Year-Based Offset Classes
 
 class YearOffset(DateOffset):
-    """DateOffset that just needs a month"""
+    """
+    DateOffset that just needs a month.
+    """
     _adjust_dst = True
     _attributes = frozenset(['n', 'normalize', 'month'])
 
@@ -1680,7 +1724,9 @@ def rule_code(self):
 
 
 class BYearEnd(YearOffset):
-    """DateOffset increments between business EOM dates"""
+    """
+    DateOffset increments between business EOM dates.
+    """
     _outputName = 'BusinessYearEnd'
     _default_month = 12
     _prefix = 'BA'
@@ -1688,7 +1734,9 @@ class BYearEnd(YearOffset):
 
 
 class BYearBegin(YearOffset):
-    """DateOffset increments between business year begin dates"""
+    """
+    DateOffset increments between business year begin dates.
+    """
     _outputName = 'BusinessYearBegin'
     _default_month = 1
     _prefix = 'BAS'
@@ -1696,14 +1744,18 @@ class BYearBegin(YearOffset):
 
 
 class YearEnd(YearOffset):
-    """DateOffset increments between calendar year ends"""
+    """
+    DateOffset increments between calendar year ends.
+    """
     _default_month = 12
     _prefix = 'A'
     _day_opt = 'end'
 
 
 class YearBegin(YearOffset):
-    """DateOffset increments between calendar year begin dates"""
+    """
+    DateOffset increments between calendar year begin dates.
+    """
     _default_month = 1
     _prefix = 'AS'
     _day_opt = 'start'
@@ -1973,8 +2025,11 @@ def isAnchored(self):
         return self.n == 1 and self._offset.isAnchored()
 
     def _rollback_to_year(self, other):
-        """roll `other` back to the most recent date that was on a fiscal year
-        end.  Return the date of that year-end, the number of full quarters
+        """
+        Roll `other` back to the most recent date that was on a fiscal year
+        end.
+
+        Return the date of that year-end, the number of full quarters
         elapsed between that year-end and other, and the remaining Timedelta
         since the most recent quarter-end.
 
@@ -2097,10 +2152,9 @@ def _from_name(cls, *args):
 
 class Easter(DateOffset):
     """
-    DateOffset for the Easter holiday using
-    logic defined in dateutil.  Right now uses
-    the revised method which is valid in years
-    1583-4099.
+    DateOffset for the Easter holiday using logic defined in dateutil.
+
+    Right now uses the revised method which is valid in years 1583-4099.
     """
     _adjust_dst = True
     _attributes = frozenset(['n', 'normalize'])
@@ -2380,7 +2434,7 @@ def generate_range(start=None, end=None, periods=None,
     """
     Generates a sequence of dates corresponding to the specified time
     offset. Similar to dateutil.rrule except uses pandas DateOffset
-    objects to represent time increments
+    objects to represent time increments.
 
     Parameters
     ----------

From 39229473bc025b1c4fedc79962a5089949c5a296 Mon Sep 17 00:00:00 2001
From: Justin Zheng <justinzhengbc@gmail.com>
Date: Tue, 27 Nov 2018 04:25:41 -0800
Subject: [PATCH 11/78] BUG GH23744 ufuncs on DataFrame keeps dtype sparseness
 (#23755)

---
 doc/source/whatsnew/v0.24.0.rst         |  2 +-
 pandas/core/apply.py                    |  2 +-
 pandas/tests/sparse/frame/test_apply.py | 11 +++++++++++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 0a066399e27ca8..e2175a38806490 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1467,6 +1467,7 @@ Sparse
 - Bug in unary inversion operator (``~``) on a ``SparseSeries`` with boolean values. The performance of this has also been improved (:issue:`22835`)
 - Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`)
 - Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`)
+- Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`)
 
 Build Changes
 ^^^^^^^^^^^^^
@@ -1493,4 +1494,3 @@ Contributors
 ~~~~~~~~~~~~
 
 .. contributors:: v0.23.4..HEAD
-
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index c44e64d29ed269..5658094ec36c60 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -132,7 +132,7 @@ def get_result(self):
         # ufunc
         elif isinstance(self.f, np.ufunc):
             with np.errstate(all='ignore'):
-                results = self.f(self.values)
+                results = self.obj._data.apply('apply', func=self.f)
             return self.obj._constructor(data=results, index=self.index,
                                          columns=self.columns, copy=False)
 
diff --git a/pandas/tests/sparse/frame/test_apply.py b/pandas/tests/sparse/frame/test_apply.py
index 2d7a537f0fb3ba..c26776ac4fd49f 100644
--- a/pandas/tests/sparse/frame/test_apply.py
+++ b/pandas/tests/sparse/frame/test_apply.py
@@ -91,3 +91,14 @@ def test_applymap(frame):
     # just test that it works
     result = frame.applymap(lambda x: x * 2)
     assert isinstance(result, SparseDataFrame)
+
+
+def test_apply_keep_sparse_dtype():
+    # GH 23744
+    sdf = SparseDataFrame(np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]),
+                          columns=['b', 'a', 'c'], default_fill_value=1)
+    df = DataFrame(sdf)
+
+    expected = sdf.apply(np.exp)
+    result = df.apply(np.exp)
+    tm.assert_frame_equal(expected, result)

From d53a4cc856e7fbdfd7d0db75e8e3416b0640ce98 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Tue, 27 Nov 2018 10:17:25 -0600
Subject: [PATCH 12/78] BUG: Properly handle CSV boolean CategoricalDtype
 (#20826)

Previously, was being parsed as object instead of boolean.

Closes gh-20498.

Original Author: @TomAugspurger
Rebased by @gfyoung due to merge conflicts.
---
 doc/source/whatsnew/v0.24.0.rst       |  1 +
 pandas/_libs/parsers.pyx              | 25 ++++++++++++++----------
 pandas/core/arrays/categorical.py     | 28 +++++++++++++++++----------
 pandas/io/parsers.py                  |  4 ++--
 pandas/tests/io/parser/test_dtypes.py | 16 +++++++++++++++
 5 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index e2175a38806490..b32d214c03126d 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1375,6 +1375,7 @@ Current Behavior:
 
 Notice how we now instead output ``np.nan`` itself instead of a stringified form of it.
 
+- Bug in :func:`read_csv` in which a column specified with ``CategoricalDtype`` of boolean categories was not being correctly coerced from string values to booleans (:issue:`20498`)
 - Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`)
 - Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`)
 - Bug in :meth:`read_excel()` when ``parse_cols`` is specified with an empty dataset (:issue:`9208`)
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index f74de79542628d..1dc71264c94ddd 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -1202,7 +1202,20 @@ cdef class TextReader:
                              bint user_dtype,
                              kh_str_t *na_hashset,
                              object na_flist):
-        if is_integer_dtype(dtype):
+        if is_categorical_dtype(dtype):
+            # TODO: I suspect that _categorical_convert could be
+            # optimized when dtype is an instance of CategoricalDtype
+            codes, cats, na_count = _categorical_convert(
+                self.parser, i, start, end, na_filter,
+                na_hashset, self.c_encoding)
+
+            # Method accepts list of strings, not encoded ones.
+            true_values = [x.decode() for x in self.true_values]
+            cat = Categorical._from_inferred_categories(
+                cats, codes, dtype, true_values=true_values)
+            return cat, na_count
+
+        elif is_integer_dtype(dtype):
             try:
                 result, na_count = _try_int64(self.parser, i, start,
                                               end, na_filter, na_hashset)
@@ -1233,6 +1246,7 @@ cdef class TextReader:
                                               na_filter, na_hashset,
                                               self.true_set, self.false_set)
             return result, na_count
+
         elif dtype.kind == 'S':
             # TODO: na handling
             width = dtype.itemsize
@@ -1252,15 +1266,6 @@ cdef class TextReader:
             # unicode variable width
             return self._string_convert(i, start, end, na_filter,
                                         na_hashset)
-        elif is_categorical_dtype(dtype):
-            # TODO: I suspect that _categorical_convert could be
-            # optimized when dtype is an instance of CategoricalDtype
-            codes, cats, na_count = _categorical_convert(
-                self.parser, i, start, end, na_filter,
-                na_hashset, self.c_encoding)
-            cat = Categorical._from_inferred_categories(cats, codes, dtype)
-            return cat, na_count
-
         elif is_object_dtype(dtype):
             return self._string_convert(i, start, end, na_filter,
                                         na_hashset)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 24df4c7a9f3798..5db851d4bf0211 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -546,8 +546,9 @@ def base(self):
 
     @classmethod
     def _from_inferred_categories(cls, inferred_categories, inferred_codes,
-                                  dtype):
-        """Construct a Categorical from inferred values
+                                  dtype, true_values=None):
+        """
+        Construct a Categorical from inferred values.
 
         For inferred categories (`dtype` is None) the categories are sorted.
         For explicit `dtype`, the `inferred_categories` are cast to the
@@ -555,10 +556,12 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
 
         Parameters
         ----------
-
         inferred_categories : Index
         inferred_codes : Index
         dtype : CategoricalDtype or 'category'
+        true_values : list, optional
+            If none are provided, the default ones are
+            "True", "TRUE", and "true."
 
         Returns
         -------
@@ -567,27 +570,32 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
         from pandas import Index, to_numeric, to_datetime, to_timedelta
 
         cats = Index(inferred_categories)
-
         known_categories = (isinstance(dtype, CategoricalDtype) and
                             dtype.categories is not None)
 
         if known_categories:
-            # Convert to a specialzed type with `dtype` if specified
+            # Convert to a specialized type with `dtype` if specified.
             if dtype.categories.is_numeric():
-                cats = to_numeric(inferred_categories, errors='coerce')
+                cats = to_numeric(inferred_categories, errors="coerce")
             elif is_datetime64_dtype(dtype.categories):
-                cats = to_datetime(inferred_categories, errors='coerce')
+                cats = to_datetime(inferred_categories, errors="coerce")
             elif is_timedelta64_dtype(dtype.categories):
-                cats = to_timedelta(inferred_categories, errors='coerce')
+                cats = to_timedelta(inferred_categories, errors="coerce")
+            elif dtype.categories.is_boolean():
+                if true_values is None:
+                    true_values = ["True", "TRUE", "true"]
+
+                cats = cats.isin(true_values)
 
         if known_categories:
-            # recode from observation order to dtype.categories order
+            # Recode from observation order to dtype.categories order.
             categories = dtype.categories
             codes = _recode_for_categories(inferred_codes, cats, categories)
         elif not cats.is_monotonic_increasing:
-            # sort categories and recode for unknown categories
+            # Sort categories and recode for unknown categories.
             unsorted = cats.copy()
             categories = cats.sort_values()
+
             codes = _recode_for_categories(inferred_codes, unsorted,
                                            categories)
             dtype = CategoricalDtype(categories, ordered=False)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 4bbd69cb5c8ada..7ade35f93a858a 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1752,8 +1752,8 @@ def _cast_types(self, values, cast_type, column):
 
             cats = Index(values).unique().dropna()
             values = Categorical._from_inferred_categories(
-                cats, cats.get_indexer(values), cast_type
-            )
+                cats, cats.get_indexer(values), cast_type,
+                true_values=self.true_values)
 
         else:
             try:
diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py
index 17cd0ab16ea61d..caa03fc3685f64 100644
--- a/pandas/tests/io/parser/test_dtypes.py
+++ b/pandas/tests/io/parser/test_dtypes.py
@@ -324,6 +324,22 @@ def test_categorical_coerces_timedelta(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("data", [
+    "b\nTrue\nFalse\nNA\nFalse",
+    "b\ntrue\nfalse\nNA\nfalse",
+    "b\nTRUE\nFALSE\nNA\nFALSE",
+    "b\nTrue\nFalse\nNA\nFALSE",
+])
+def test_categorical_dtype_coerces_boolean(all_parsers, data):
+    # see gh-20498
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([False, True])}
+    expected = DataFrame({"b": Categorical([True, False, None, False])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
 def test_categorical_unexpected_categories(all_parsers):
     parser = all_parsers
     dtype = {"b": CategoricalDtype(["a", "b", "d", "e"])}

From a215a7b7a4661b657969ecd5865609a7579de7e7 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 27 Nov 2018 18:15:28 +0100
Subject: [PATCH 13/78] DOC: fix double Returns section in set_index docstring
 (#23948)

---
 pandas/core/frame.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9481689d4099ac..6a3c69c2965a5f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3971,10 +3971,6 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         DataFrame.reindex : Change to new indices or expand indices.
         DataFrame.reindex_like : Change to same indices as other DataFrame.
 
-        Returns
-        -------
-        DataFrame
-
         Examples
         --------
         >>> df = pd.DataFrame({'month': [1, 4, 7, 10],

From ee283faee006abf5b879d37fe17c2db4e0fa51fd Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 27 Nov 2018 10:01:53 -0800
Subject: [PATCH 14/78] CLN: Remove/deprecate unused/misleading dtype functions
 (#23917)

---
 doc/source/whatsnew/v0.24.0.rst       |  2 +
 pandas/_libs/lib.pyx                  | 31 +--------
 pandas/core/algorithms.py             |  4 +-
 pandas/core/arrays/datetimes.py       |  9 ++-
 pandas/core/dtypes/cast.py            | 44 +++----------
 pandas/core/dtypes/common.py          | 93 +++++----------------------
 pandas/core/dtypes/concat.py          |  4 +-
 pandas/core/frame.py                  |  5 +-
 pandas/core/indexes/datetimes.py      |  9 ++-
 pandas/core/internals/blocks.py       | 10 +--
 pandas/core/internals/concat.py       |  9 +--
 pandas/core/nanops.py                 |  8 ++-
 pandas/core/reshape/merge.py          | 14 +++-
 pandas/io/formats/format.py           |  4 +-
 pandas/tests/api/test_types.py        |  6 +-
 pandas/tests/dtypes/test_cast.py      | 57 ----------------
 pandas/tests/dtypes/test_common.py    | 38 ++++-------
 pandas/tests/dtypes/test_dtypes.py    | 29 ++++++---
 pandas/tests/dtypes/test_inference.py | 12 ----
 pandas/tests/test_base.py             | 12 ++--
 20 files changed, 115 insertions(+), 285 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index b32d214c03126d..d0dddb19f4c935 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1048,6 +1048,8 @@ Deprecations
 - The ``keep_tz=False`` option (the default) of the ``keep_tz`` keyword of
   :meth:`DatetimeIndex.to_series` is deprecated (:issue:`17832`).
 - Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`)
+- :func:`pandas.types.is_period` is deprecated in favor of `pandas.types.is_period_dtype` (:issue:`23917`)
+- :func:`pandas.types.is_datetimetz` is deprecated in favor of `pandas.types.is_datetime64tz` (:issue:`23917`)
 
 .. _whatsnew_0240.deprecations.datetimelike_int_ops:
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 874206378f79cd..0c081986d83c55 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1248,25 +1248,19 @@ def infer_dtype(value: object, skipna: bool=False) -> str:
     if util.is_datetime64_object(val):
         if is_datetime64_array(values):
             return 'datetime64'
-        elif is_timedelta_or_timedelta64_array(values):
-            return 'timedelta'
 
     elif is_timedelta(val):
         if is_timedelta_or_timedelta64_array(values):
             return 'timedelta'
 
     elif util.is_integer_object(val):
-        # a timedelta will show true here as well
-        if is_timedelta(val):
-            if is_timedelta_or_timedelta64_array(values):
-                return 'timedelta'
+        # ordering matters here; this check must come after the is_timedelta
+        #  check otherwise numpy timedelta64 objects would come through here
 
         if is_integer_array(values):
             return 'integer'
         elif is_integer_float_array(values):
             return 'mixed-integer-float'
-        elif is_timedelta_or_timedelta64_array(values):
-            return 'timedelta'
         return 'mixed-integer'
 
     elif PyDateTime_Check(val):
@@ -1699,27 +1693,6 @@ cdef class TimedeltaValidator(TemporalValidator):
         return is_null_timedelta64(value)
 
 
-# TODO: Not used outside of tests; remove?
-def is_timedelta_array(values: ndarray) -> bool:
-    cdef:
-        TimedeltaValidator validator = TimedeltaValidator(len(values),
-                                                          skipna=True)
-    return validator.validate(values)
-
-
-cdef class Timedelta64Validator(TimedeltaValidator):
-    cdef inline bint is_value_typed(self, object value) except -1:
-        return util.is_timedelta64_object(value)
-
-
-# TODO: Not used outside of tests; remove?
-def is_timedelta64_array(values: ndarray) -> bool:
-    cdef:
-        Timedelta64Validator validator = Timedelta64Validator(len(values),
-                                                              skipna=True)
-    return validator.validate(values)
-
-
 cdef class AnyTimedeltaValidator(TimedeltaValidator):
     cdef inline bint is_value_typed(self, object value) except -1:
         return is_timedelta(value)
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 5f7995ac649a20..7aceef8634e206 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -19,7 +19,7 @@
     ensure_float64, ensure_int64, ensure_object, ensure_platform_int,
     ensure_uint64, is_array_like, is_bool_dtype, is_categorical_dtype,
     is_complex_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype,
-    is_datetimelike, is_datetimetz, is_extension_array_dtype, is_float_dtype,
+    is_datetimelike, is_extension_array_dtype, is_float_dtype,
     is_integer_dtype, is_interval_dtype, is_list_like, is_numeric_dtype,
     is_object_dtype, is_period_dtype, is_scalar, is_signed_integer_dtype,
     is_sparse, is_timedelta64_dtype, is_unsigned_integer_dtype,
@@ -1581,7 +1581,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
     # dispatch to internal type takes
     if is_extension_array_dtype(arr):
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
-    elif is_datetimetz(arr):
+    elif is_datetime64tz_dtype(arr):
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
     elif is_interval_dtype(arr):
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 41c28c5ee3b93b..4d3caaacca1c16 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -1458,15 +1458,18 @@ def maybe_convert_dtype(data, copy):
     """
     Convert data based on dtype conventions, issuing deprecation warnings
     or errors where appropriate.
-     Parameters
+
+    Parameters
     ----------
     data : np.ndarray or pd.Index
     copy : bool
-     Returns
+
+    Returns
     -------
     data : np.ndarray or pd.Index
     copy : bool
-     Raises
+
+    Raises
     ------
     TypeError : PeriodDType data is passed
     """
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index afe6ba45bb4005..eae9eb97f35fef 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -13,11 +13,11 @@
     ensure_int8, ensure_int16, ensure_int32, ensure_int64, ensure_object,
     is_bool, is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype,
     is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype,
-    is_datetime_or_timedelta_dtype, is_datetimelike, is_datetimetz,
-    is_dtype_equal, is_extension_array_dtype, is_extension_type, is_float,
-    is_float_dtype, is_integer, is_integer_dtype, is_object_dtype, is_scalar,
-    is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype,
-    is_unsigned_integer_dtype, pandas_dtype)
+    is_datetime_or_timedelta_dtype, is_datetimelike, is_dtype_equal,
+    is_extension_array_dtype, is_extension_type, is_float, is_float_dtype,
+    is_integer, is_integer_dtype, is_object_dtype, is_scalar, is_string_dtype,
+    is_timedelta64_dtype, is_timedelta64_ns_dtype, is_unsigned_integer_dtype,
+    pandas_dtype)
 from .dtypes import (
     DatetimeTZDtype, ExtensionDtype, PandasExtensionDtype, PeriodDtype)
 from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries
@@ -267,7 +267,7 @@ def maybe_promote(dtype, fill_value=np.nan):
         fill_value = tslibs.Timestamp(fill_value).value
     elif issubclass(dtype.type, np.timedelta64):
         fill_value = tslibs.Timedelta(fill_value).value
-    elif is_datetimetz(dtype):
+    elif is_datetime64tz_dtype(dtype):
         if isna(fill_value):
             fill_value = iNaT
     elif is_extension_array_dtype(dtype) and isna(fill_value):
@@ -310,7 +310,7 @@ def maybe_promote(dtype, fill_value=np.nan):
     # in case we have a string that looked like a number
     if is_extension_array_dtype(dtype):
         pass
-    elif is_datetimetz(dtype):
+    elif is_datetime64tz_dtype(dtype):
         pass
     elif issubclass(np.dtype(dtype).type, string_types):
         dtype = np.object_
@@ -546,34 +546,6 @@ def invalidate_string_dtypes(dtype_set):
         raise TypeError("string dtypes are not allowed, use 'object' instead")
 
 
-def maybe_convert_string_to_object(values):
-    """
-
-    Convert string-like and string-like array to convert object dtype.
-    This is to avoid numpy to handle the array as str dtype.
-    """
-    if isinstance(values, string_types):
-        values = np.array([values], dtype=object)
-    elif (isinstance(values, np.ndarray) and
-          issubclass(values.dtype.type, (np.string_, np.unicode_))):
-        values = values.astype(object)
-    return values
-
-
-def maybe_convert_scalar(values):
-    """
-    Convert a python scalar to the appropriate numpy dtype if possible
-    This avoids numpy directly converting according to platform preferences
-    """
-    if is_scalar(values):
-        dtype, values = infer_dtype_from_scalar(values)
-        try:
-            values = dtype(values)
-        except TypeError:
-            pass
-    return values
-
-
 def coerce_indexer_dtype(indexer, categories):
     """ coerce the indexer input array to the smallest dtype possible """
     length = len(categories)
@@ -1188,7 +1160,7 @@ def construct_1d_arraylike_from_scalar(value, length, dtype):
     np.ndarray / pandas type of length, filled with value
 
     """
-    if is_datetimetz(dtype):
+    if is_datetime64tz_dtype(dtype):
         from pandas import DatetimeIndex
         subarr = DatetimeIndex([value] * length, dtype=dtype)
     elif is_categorical_dtype(dtype):
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index de7e453e80d85f..51b8488313e996 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1,4 +1,6 @@
 """ common type operations """
+import warnings
+
 import numpy as np
 
 from pandas._libs import algos, lib
@@ -287,6 +289,8 @@ def is_datetimetz(arr):
     Check whether an array-like is a datetime array-like with a timezone
     component in its dtype.
 
+    .. deprecated:: 0.24.0
+
     Parameters
     ----------
     arr : array-like
@@ -320,12 +324,10 @@ def is_datetimetz(arr):
     True
     """
 
-    # TODO: do we need this function?
-    # It seems like a repeat of is_datetime64tz_dtype.
-
-    return ((isinstance(arr, ABCDatetimeIndex) and
-             getattr(arr, 'tz', None) is not None) or
-            is_datetime64tz_dtype(arr))
+    warnings.warn("'is_datetimetz' is deprecated and will be removed in a "
+                  "future version.  Use 'is_datetime64tz_dtype' instead.",
+                  FutureWarning, stacklevel=2)
+    return is_datetime64tz_dtype(arr)
 
 
 def is_offsetlike(arr_or_obj):
@@ -363,6 +365,8 @@ def is_period(arr):
     """
     Check whether an array-like is a periodical index.
 
+    .. deprecated:: 0.24.0
+
     Parameters
     ----------
     arr : array-like
@@ -382,8 +386,10 @@ def is_period(arr):
     True
     """
 
-    # TODO: do we need this function?
-    # It seems like a repeat of is_period_arraylike.
+    warnings.warn("'is_period' is deprecated and will be removed in a future "
+                  "version.  Use 'is_period_dtype' or is_period_arraylike' "
+                  "instead.", FutureWarning, stacklevel=2)
+
     return isinstance(arr, ABCPeriodIndex) or is_period_arraylike(arr)
 
 
@@ -743,8 +749,7 @@ def is_datetimelike(arr):
 
     return (is_datetime64_dtype(arr) or is_datetime64tz_dtype(arr) or
             is_timedelta64_dtype(arr) or
-            isinstance(arr, ABCPeriodIndex) or
-            is_datetimetz(arr))
+            isinstance(arr, ABCPeriodIndex))
 
 
 def is_dtype_equal(source, target):
@@ -1050,54 +1055,6 @@ def is_int64_dtype(arr_or_dtype):
     return issubclass(tipo, np.int64)
 
 
-def is_int_or_datetime_dtype(arr_or_dtype):
-    """
-    Check whether the provided array or dtype is of an
-    integer, timedelta64, or datetime64 dtype.
-
-    Parameters
-    ----------
-    arr_or_dtype : array-like
-        The array or dtype to check.
-
-    Returns
-    -------
-    boolean : Whether or not the array or dtype is of an
-              integer, timedelta64, or datetime64 dtype.
-
-    Examples
-    --------
-    >>> is_int_or_datetime_dtype(str)
-    False
-    >>> is_int_or_datetime_dtype(int)
-    True
-    >>> is_int_or_datetime_dtype(float)
-    False
-    >>> is_int_or_datetime_dtype(np.uint64)
-    True
-    >>> is_int_or_datetime_dtype(np.datetime64)
-    True
-    >>> is_int_or_datetime_dtype(np.timedelta64)
-    True
-    >>> is_int_or_datetime_dtype(np.array(['a', 'b']))
-    False
-    >>> is_int_or_datetime_dtype(pd.Series([1, 2]))
-    True
-    >>> is_int_or_datetime_dtype(np.array([], dtype=np.timedelta64))
-    True
-    >>> is_int_or_datetime_dtype(np.array([], dtype=np.datetime64))
-    True
-    >>> is_int_or_datetime_dtype(pd.Index([1, 2.]))  # float
-    False
-    """
-
-    if arr_or_dtype is None:
-        return False
-    tipo = _get_dtype_type(arr_or_dtype)
-    return (issubclass(tipo, np.integer) or
-            issubclass(tipo, (np.datetime64, np.timedelta64)))
-
-
 def is_datetime64_any_dtype(arr_or_dtype):
     """
     Check whether the provided array or dtype is of the datetime64 dtype.
@@ -1619,22 +1576,6 @@ def is_float_dtype(arr_or_dtype):
     return issubclass(tipo, np.floating)
 
 
-def is_floating_dtype(arr_or_dtype):
-    """Check whether the provided array or dtype is an instance of
-    numpy's float dtype.
-
-    .. deprecated:: 0.20.0
-
-    Unlike, `is_float_dtype`, this check is a lot stricter, as it requires
-    `isinstance` of `np.floating` and not `issubclass`.
-    """
-
-    if arr_or_dtype is None:
-        return False
-    tipo = _get_dtype_type(arr_or_dtype)
-    return isinstance(tipo, np.floating)
-
-
 def is_bool_dtype(arr_or_dtype):
     """
     Check whether the provided array or dtype is of a boolean dtype.
@@ -1758,7 +1699,7 @@ def is_extension_type(arr):
         return True
     elif is_sparse(arr):
         return True
-    elif is_datetimetz(arr):
+    elif is_datetime64tz_dtype(arr):
         return True
     return False
 
@@ -1991,7 +1932,7 @@ def _get_dtype_from_object(dtype):
         return dtype
     elif is_categorical(dtype):
         return CategoricalDtype().type
-    elif is_datetimetz(dtype):
+    elif is_datetime64tz_dtype(dtype):
         return DatetimeTZDtype(dtype).type
     elif isinstance(dtype, np.dtype):  # dtype object
         try:
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index f482f7e1927b7a..098ac3e0c00a53 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -8,7 +8,7 @@
 
 from pandas.core.dtypes.common import (
     _NS_DTYPE, _TD_DTYPE, is_bool_dtype, is_categorical_dtype,
-    is_datetime64_dtype, is_datetimetz, is_dtype_equal,
+    is_datetime64_dtype, is_datetime64tz_dtype, is_dtype_equal,
     is_extension_array_dtype, is_interval_dtype, is_object_dtype,
     is_period_dtype, is_sparse, is_timedelta64_dtype)
 from pandas.core.dtypes.generic import (
@@ -39,7 +39,7 @@ def get_dtype_kinds(l):
             typ = 'sparse'
         elif isinstance(arr, ABCRangeIndex):
             typ = 'range'
-        elif is_datetimetz(arr):
+        elif is_datetime64tz_dtype(arr):
             # if to_concat contains different tz,
             # the result must be object dtype
             typ = str(arr.dtype)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6a3c69c2965a5f..688f0226dcdbaa 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -54,7 +54,7 @@
     is_object_dtype,
     is_extension_type,
     is_extension_array_dtype,
-    is_datetimetz,
+    is_datetime64tz_dtype,
     is_datetime64_any_dtype,
     is_bool_dtype,
     is_integer_dtype,
@@ -542,7 +542,8 @@ def _get_axes(N, K, index=index, columns=columns):
             index, columns = _get_axes(len(values), 1)
             return _arrays_to_mgr([values], columns, index, columns,
                                   dtype=dtype)
-        elif (is_datetimetz(values) or is_extension_array_dtype(values)):
+        elif (is_datetime64tz_dtype(values) or
+              is_extension_array_dtype(values)):
             # GH19157
             if columns is None:
                 columns = [0]
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 61e8d6344a0e9e..8b563a9b9bed00 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -16,9 +16,9 @@
 
 from pandas.core.dtypes.common import (
     _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype,
-    is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimetz,
-    is_dtype_equal, is_float, is_integer, is_integer_dtype, is_list_like,
-    is_period_dtype, is_scalar, is_string_like, pandas_dtype)
+    is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, is_float,
+    is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar,
+    is_string_like, pandas_dtype)
 import pandas.core.dtypes.concat as _concat
 from pandas.core.dtypes.generic import ABCSeries
 from pandas.core.dtypes.missing import isna
@@ -267,8 +267,7 @@ def __new__(cls, data=None,
 
         # By this point we are assured to have either a numpy array or Index
         data, copy = maybe_convert_dtype(data, copy)
-
-        if not (is_datetime64_dtype(data) or is_datetimetz(data) or
+        if not (is_datetime64_dtype(data) or is_datetime64tz_dtype(data) or
                 is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'):
             data = tools.to_datetime(data, dayfirst=dayfirst,
                                      yearfirst=yearfirst)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 21dae455cac1b5..4ae7a812e014d5 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -20,7 +20,7 @@
 from pandas.core.dtypes.common import (
     _NS_DTYPE, _TD_DTYPE, ensure_platform_int, is_bool_dtype, is_categorical,
     is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
-    is_datetimetz, is_dtype_equal, is_extension_array_dtype, is_extension_type,
+    is_dtype_equal, is_extension_array_dtype, is_extension_type,
     is_float_dtype, is_integer, is_integer_dtype, is_list_like,
     is_numeric_v_string_like, is_object_dtype, is_re, is_re_compilable,
     is_sparse, is_timedelta64_dtype, pandas_dtype)
@@ -2765,7 +2765,7 @@ def to_native_types(self, slicer=None, na_rep=None, date_format=None,
 
     def should_store(self, value):
         return (issubclass(value.dtype.type, np.datetime64) and
-                not is_datetimetz(value) and
+                not is_datetime64tz_dtype(value) and
                 not is_extension_array_dtype(value))
 
     def set(self, locs, values, check=False):
@@ -3017,9 +3017,9 @@ def get_block_type(values, dtype=None):
     elif issubclass(vtype, np.complexfloating):
         cls = ComplexBlock
     elif issubclass(vtype, np.datetime64):
-        assert not is_datetimetz(values)
+        assert not is_datetime64tz_dtype(values)
         cls = DatetimeBlock
-    elif is_datetimetz(values):
+    elif is_datetime64tz_dtype(values):
         cls = DatetimeTZBlock
     elif issubclass(vtype, np.integer):
         cls = IntBlock
@@ -3040,7 +3040,7 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None,
         dtype = dtype or values.dtype
         klass = get_block_type(values, dtype)
 
-    elif klass is DatetimeTZBlock and not is_datetimetz(values):
+    elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values):
         return klass(values, ndim=ndim,
                      placement=placement, dtype=dtype)
 
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 2fb533478b2f3f..2441c64518d59b 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -10,8 +10,9 @@
 
 from pandas.core.dtypes.cast import maybe_promote
 from pandas.core.dtypes.common import (
-    _get_dtype, is_categorical_dtype, is_datetime64_dtype, is_datetimetz,
-    is_float_dtype, is_numeric_dtype, is_sparse, is_timedelta64_dtype)
+    _get_dtype, is_categorical_dtype, is_datetime64_dtype,
+    is_datetime64tz_dtype, is_float_dtype, is_numeric_dtype, is_sparse,
+    is_timedelta64_dtype)
 import pandas.core.dtypes.concat as _concat
 from pandas.core.dtypes.missing import isna
 
@@ -179,7 +180,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
                         fill_value = None
 
                 if (getattr(self.block, 'is_datetimetz', False) or
-                        is_datetimetz(empty_dtype)):
+                        is_datetime64tz_dtype(empty_dtype)):
                     if self.block is None:
                         array = empty_dtype.construct_array_type()
                         missing_arr = array([fill_value], dtype=empty_dtype)
@@ -293,7 +294,7 @@ def get_empty_dtype_and_na(join_units):
 
         if is_categorical_dtype(dtype):
             upcast_cls = 'category'
-        elif is_datetimetz(dtype):
+        elif is_datetime64tz_dtype(dtype):
             upcast_cls = 'datetimetz'
         elif issubclass(dtype.type, np.bool_):
             upcast_cls = 'bool'
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 4369ac60a075e7..027f458614bd8b 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -13,8 +13,8 @@
 from pandas.core.dtypes.common import (
     _get_dtype, is_any_int_dtype, is_bool_dtype, is_complex, is_complex_dtype,
     is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_float,
-    is_float_dtype, is_int_or_datetime_dtype, is_integer, is_integer_dtype,
-    is_numeric_dtype, is_object_dtype, is_scalar, is_timedelta64_dtype)
+    is_float_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
+    is_object_dtype, is_scalar, is_timedelta64_dtype)
 from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
 
 import pandas.core.common as com
@@ -254,7 +254,9 @@ def _isfinite(values):
 
 
 def _na_ok_dtype(dtype):
-    return not is_int_or_datetime_dtype(dtype)
+    # TODO: what about datetime64tz?  PeriodDtype?
+    return not issubclass(dtype.type,
+                          (np.integer, np.timedelta64, np.datetime64))
 
 
 def _view_if_needed(values):
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index a7e83c88cd355a..dfbee5656da7d3 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -18,8 +18,8 @@
     ensure_float64, ensure_int64, ensure_object, is_array_like, is_bool,
     is_bool_dtype, is_categorical_dtype, is_datetime64_dtype,
     is_datetime64tz_dtype, is_datetimelike, is_dtype_equal, is_float_dtype,
-    is_int64_dtype, is_int_or_datetime_dtype, is_integer, is_integer_dtype,
-    is_list_like, is_number, is_numeric_dtype, needs_i8_conversion)
+    is_int64_dtype, is_integer, is_integer_dtype, is_list_like, is_number,
+    is_numeric_dtype, needs_i8_conversion)
 from pandas.core.dtypes.missing import isnull, na_value_for_dtype
 
 from pandas import Categorical, DataFrame, Index, MultiIndex, Series, Timedelta
@@ -1604,7 +1604,15 @@ def _factorize_keys(lk, rk, sort=True):
 
         lk = ensure_int64(lk.codes)
         rk = ensure_int64(rk)
-    elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
+    elif is_integer_dtype(lk) and is_integer_dtype(rk):
+        # GH#23917 TODO: needs tests for case where lk is integer-dtype
+        #  and rk is datetime-dtype
+        klass = libhashtable.Int64Factorizer
+        lk = ensure_int64(com.values_from_object(lk))
+        rk = ensure_int64(com.values_from_object(rk))
+    elif (issubclass(lk.dtype.type, (np.timedelta64, np.datetime64)) and
+          issubclass(rk.dtype.type, (np.timedelta64, np.datetime64))):
+        # GH#23917 TODO: Needs tests for non-matching dtypes
         klass = libhashtable.Int64Factorizer
         lk = ensure_int64(com.values_from_object(lk))
         rk = ensure_int64(com.values_from_object(rk))
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index ff9ee9ed7296a5..b35f5d1e548b74 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -16,7 +16,7 @@
 from pandas.compat import StringIO, lzip, map, u, zip
 
 from pandas.core.dtypes.common import (
-    is_categorical_dtype, is_datetime64_dtype, is_datetimetz, is_float,
+    is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_float,
     is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype,
     is_list_like, is_numeric_dtype, is_period_arraylike, is_scalar,
     is_timedelta64_dtype)
@@ -852,7 +852,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
         fmt_klass = PeriodArrayFormatter
     elif is_integer_dtype(values.dtype):
         fmt_klass = IntArrayFormatter
-    elif is_datetimetz(values):
+    elif is_datetime64tz_dtype(values):
         fmt_klass = Datetime64TZFormatter
     elif is_datetime64_dtype(values.dtype):
         fmt_klass = Datetime64Formatter
diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py
index c36af4404e646b..3468a4db617b03 100644
--- a/pandas/tests/api/test_types.py
+++ b/pandas/tests/api/test_types.py
@@ -13,21 +13,21 @@ class TestTypes(Base):
                'is_categorical', 'is_categorical_dtype', 'is_complex',
                'is_complex_dtype', 'is_datetime64_any_dtype',
                'is_datetime64_dtype', 'is_datetime64_ns_dtype',
-               'is_datetime64tz_dtype', 'is_datetimetz', 'is_dtype_equal',
+               'is_datetime64tz_dtype', 'is_dtype_equal',
                'is_extension_type', 'is_float', 'is_float_dtype',
                'is_int64_dtype', 'is_integer',
                'is_integer_dtype', 'is_number', 'is_numeric_dtype',
                'is_object_dtype', 'is_scalar', 'is_sparse',
                'is_string_dtype', 'is_signed_integer_dtype',
                'is_timedelta64_dtype', 'is_timedelta64_ns_dtype',
-               'is_unsigned_integer_dtype', 'is_period',
+               'is_unsigned_integer_dtype',
                'is_period_dtype', 'is_interval', 'is_interval_dtype',
                'is_re', 'is_re_compilable',
                'is_dict_like', 'is_iterator', 'is_file_like',
                'is_list_like', 'is_hashable', 'is_array_like',
                'is_named_tuple',
                'pandas_dtype', 'union_categoricals', 'infer_dtype']
-    deprecated = []
+    deprecated = ['is_period', 'is_datetimetz']
     dtypes = ['CategoricalDtype', 'DatetimeTZDtype',
               'PeriodDtype', 'IntervalDtype']
 
diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py
index 0d6382424ccf57..fcdcf96098f16c 100644
--- a/pandas/tests/dtypes/test_cast.py
+++ b/pandas/tests/dtypes/test_cast.py
@@ -19,8 +19,6 @@
     cast_scalar_to_array,
     infer_dtype_from_scalar,
     infer_dtype_from_array,
-    maybe_convert_string_to_object,
-    maybe_convert_scalar,
     find_common_type,
     construct_1d_object_array_from_listlike,
     construct_1d_ndarray_preserving_na,
@@ -243,61 +241,6 @@ def test_cast_scalar_to_array(self):
 
 class TestMaybe(object):
 
-    def test_maybe_convert_string_to_array(self):
-        result = maybe_convert_string_to_object('x')
-        tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object))
-        assert result.dtype == object
-
-        result = maybe_convert_string_to_object(1)
-        assert result == 1
-
-        arr = np.array(['x', 'y'], dtype=str)
-        result = maybe_convert_string_to_object(arr)
-        tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object))
-        assert result.dtype == object
-
-        # unicode
-        arr = np.array(['x', 'y']).astype('U')
-        result = maybe_convert_string_to_object(arr)
-        tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object))
-        assert result.dtype == object
-
-        # object
-        arr = np.array(['x', 2], dtype=object)
-        result = maybe_convert_string_to_object(arr)
-        tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object))
-        assert result.dtype == object
-
-    def test_maybe_convert_scalar(self):
-
-        # pass thru
-        result = maybe_convert_scalar('x')
-        assert result == 'x'
-        result = maybe_convert_scalar(np.array([1]))
-        assert result == np.array([1])
-
-        # leave scalar dtype
-        result = maybe_convert_scalar(np.int64(1))
-        assert result == np.int64(1)
-        result = maybe_convert_scalar(np.int32(1))
-        assert result == np.int32(1)
-        result = maybe_convert_scalar(np.float32(1))
-        assert result == np.float32(1)
-        result = maybe_convert_scalar(np.int64(1))
-        assert result == np.float64(1)
-
-        # coerce
-        result = maybe_convert_scalar(1)
-        assert result == np.int64(1)
-        result = maybe_convert_scalar(1.0)
-        assert result == np.float64(1)
-        result = maybe_convert_scalar(Timestamp('20130101'))
-        assert result == Timestamp('20130101').value
-        result = maybe_convert_scalar(datetime(2013, 1, 1))
-        assert result == Timestamp('20130101').value
-        result = maybe_convert_scalar(Timedelta('1 day 1 min'))
-        assert result == Timedelta('1 day 1 min').value
-
     def test_maybe_infer_to_datetimelike(self):
         # GH16362
         # pandas=0.20.1 raises IndexError: tuple index out of range
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index 4dd55321dc71fe..a7390e0cffbbf3 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -10,6 +10,7 @@
 
 import pandas.core.dtypes.common as com
 import pandas.util._test_decorators as td
+import pandas.util.testing as tm
 
 
 class TestPandasDtype(object):
@@ -161,20 +162,22 @@ def test_is_categorical():
 
 
 def test_is_datetimetz():
-    assert not com.is_datetimetz([1, 2, 3])
-    assert not com.is_datetimetz(pd.DatetimeIndex([1, 2, 3]))
+    with tm.assert_produces_warning(FutureWarning):
+        assert not com.is_datetimetz([1, 2, 3])
+        assert not com.is_datetimetz(pd.DatetimeIndex([1, 2, 3]))
 
-    assert com.is_datetimetz(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
+        assert com.is_datetimetz(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
 
-    dtype = DatetimeTZDtype("ns", tz="US/Eastern")
-    s = pd.Series([], dtype=dtype)
-    assert com.is_datetimetz(s)
+        dtype = DatetimeTZDtype("ns", tz="US/Eastern")
+        s = pd.Series([], dtype=dtype)
+        assert com.is_datetimetz(s)
 
 
-def test_is_period():
-    assert not com.is_period([1, 2, 3])
-    assert not com.is_period(pd.Index([1, 2, 3]))
-    assert com.is_period(pd.PeriodIndex(["2017-01-01"], freq="D"))
+def test_is_period_deprecated():
+    with tm.assert_produces_warning(FutureWarning):
+        assert not com.is_period([1, 2, 3])
+        assert not com.is_period(pd.Index([1, 2, 3]))
+        assert com.is_period(pd.PeriodIndex(["2017-01-01"], freq="D"))
 
 
 def test_is_datetime64_dtype():
@@ -328,21 +331,6 @@ def test_is_int64_dtype():
     assert com.is_int64_dtype(np.array([1, 2], dtype=np.int64))
 
 
-def test_is_int_or_datetime_dtype():
-    assert not com.is_int_or_datetime_dtype(str)
-    assert not com.is_int_or_datetime_dtype(float)
-    assert not com.is_int_or_datetime_dtype(pd.Index([1, 2.]))
-    assert not com.is_int_or_datetime_dtype(np.array(['a', 'b']))
-
-    assert com.is_int_or_datetime_dtype(int)
-    assert com.is_int_or_datetime_dtype(np.uint64)
-    assert com.is_int_or_datetime_dtype(np.datetime64)
-    assert com.is_int_or_datetime_dtype(np.timedelta64)
-    assert com.is_int_or_datetime_dtype(pd.Series([1, 2]))
-    assert com.is_int_or_datetime_dtype(np.array([], dtype=np.datetime64))
-    assert com.is_int_or_datetime_dtype(np.array([], dtype=np.timedelta64))
-
-
 def test_is_datetime64_any_dtype():
     assert not com.is_datetime64_any_dtype(int)
     assert not com.is_datetime64_any_dtype(str)
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index c70a549234a449..4048e98142a7f2 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -229,20 +229,25 @@ def test_basic(self):
         assert not is_datetime64tz_dtype(np.dtype('float64'))
         assert not is_datetime64tz_dtype(1.0)
 
-        assert is_datetimetz(s)
-        assert is_datetimetz(s.dtype)
-        assert not is_datetimetz(np.dtype('float64'))
-        assert not is_datetimetz(1.0)
+        with tm.assert_produces_warning(FutureWarning):
+            assert is_datetimetz(s)
+            assert is_datetimetz(s.dtype)
+            assert not is_datetimetz(np.dtype('float64'))
+            assert not is_datetimetz(1.0)
 
     def test_dst(self):
 
         dr1 = date_range('2013-01-01', periods=3, tz='US/Eastern')
         s1 = Series(dr1, name='A')
-        assert is_datetimetz(s1)
+        assert is_datetime64tz_dtype(s1)
+        with tm.assert_produces_warning(FutureWarning):
+            assert is_datetimetz(s1)
 
         dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern')
         s2 = Series(dr2, name='A')
-        assert is_datetimetz(s2)
+        assert is_datetime64tz_dtype(s2)
+        with tm.assert_produces_warning(FutureWarning):
+            assert is_datetimetz(s2)
         assert s1.dtype == s2.dtype
 
     @pytest.mark.parametrize('tz', ['UTC', 'US/Eastern'])
@@ -378,18 +383,22 @@ def test_basic(self):
 
         assert is_period_dtype(pidx.dtype)
         assert is_period_dtype(pidx)
-        assert is_period(pidx)
+        with tm.assert_produces_warning(FutureWarning):
+            assert is_period(pidx)
 
         s = Series(pidx, name='A')
 
         assert is_period_dtype(s.dtype)
         assert is_period_dtype(s)
-        assert is_period(s)
+        with tm.assert_produces_warning(FutureWarning):
+            assert is_period(s)
 
         assert not is_period_dtype(np.dtype('float64'))
         assert not is_period_dtype(1.0)
-        assert not is_period(np.dtype('float64'))
-        assert not is_period(1.0)
+        with tm.assert_produces_warning(FutureWarning):
+            assert not is_period(np.dtype('float64'))
+        with tm.assert_produces_warning(FutureWarning):
+            assert not is_period(1.0)
 
     def test_empty(self):
         dt = PeriodDtype()
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 20ad39e137d466..a13af123f9abe9 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -875,37 +875,27 @@ def test_is_datetimelike_array_all_nan_nat_like(self):
         arr = np.array([np.nan, pd.NaT, np.datetime64('nat')])
         assert lib.is_datetime_array(arr)
         assert lib.is_datetime64_array(arr)
-        assert not lib.is_timedelta_array(arr)
-        assert not lib.is_timedelta64_array(arr)
         assert not lib.is_timedelta_or_timedelta64_array(arr)
 
         arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')])
         assert not lib.is_datetime_array(arr)
         assert not lib.is_datetime64_array(arr)
-        assert lib.is_timedelta_array(arr)
-        assert lib.is_timedelta64_array(arr)
         assert lib.is_timedelta_or_timedelta64_array(arr)
 
         arr = np.array([np.nan, pd.NaT, np.datetime64('nat'),
                         np.timedelta64('nat')])
         assert not lib.is_datetime_array(arr)
         assert not lib.is_datetime64_array(arr)
-        assert not lib.is_timedelta_array(arr)
-        assert not lib.is_timedelta64_array(arr)
         assert not lib.is_timedelta_or_timedelta64_array(arr)
 
         arr = np.array([np.nan, pd.NaT])
         assert lib.is_datetime_array(arr)
         assert lib.is_datetime64_array(arr)
-        assert lib.is_timedelta_array(arr)
-        assert lib.is_timedelta64_array(arr)
         assert lib.is_timedelta_or_timedelta64_array(arr)
 
         arr = np.array([np.nan, np.nan], dtype=object)
         assert not lib.is_datetime_array(arr)
         assert not lib.is_datetime64_array(arr)
-        assert not lib.is_timedelta_array(arr)
-        assert not lib.is_timedelta64_array(arr)
         assert not lib.is_timedelta_or_timedelta64_array(arr)
 
         assert lib.is_datetime_with_singletz_array(
@@ -923,8 +913,6 @@ def test_is_datetimelike_array_all_nan_nat_like(self):
             'is_datetime_array',
             'is_datetime64_array',
             'is_bool_array',
-            'is_timedelta_array',
-            'is_timedelta64_array',
             'is_timedelta_or_timedelta64_array',
             'is_date_array',
             'is_time_array',
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index c7efc1efaee8f5..7bc67dd99adaec 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -10,7 +10,7 @@
 import pandas as pd
 import pandas.compat as compat
 from pandas.core.dtypes.common import (
-    is_object_dtype, is_datetimetz, is_datetime64_dtype,
+    is_object_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
     needs_i8_conversion)
 import pandas.util.testing as tm
 from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex,
@@ -296,7 +296,7 @@ def test_none_comparison(self):
                 assert result.iat[0]
                 assert result.iat[1]
 
-                if (is_datetime64_dtype(o) or is_datetimetz(o)):
+                if (is_datetime64_dtype(o) or is_datetime64tz_dtype(o)):
                     # Following DatetimeIndex (and Timestamp) convention,
                     # inequality comparisons with Series[datetime64] raise
                     with pytest.raises(TypeError):
@@ -446,7 +446,7 @@ def test_value_counts_unique_nunique(self):
             if isinstance(o, Index):
                 assert isinstance(result, o.__class__)
                 tm.assert_index_equal(result, orig)
-            elif is_datetimetz(o):
+            elif is_datetime64tz_dtype(o):
                 # datetimetz Series returns array of Timestamp
                 assert result[0] == orig[0]
                 for r in result:
@@ -470,7 +470,7 @@ def test_value_counts_unique_nunique_null(self):
                     continue
 
                 # special assign to the numpy array
-                if is_datetimetz(o):
+                if is_datetime64tz_dtype(o):
                     if isinstance(o, DatetimeIndex):
                         v = o.asi8
                         v[0:2] = iNaT
@@ -499,7 +499,7 @@ def test_value_counts_unique_nunique_null(self):
                     o = klass(values.repeat(range(1, len(o) + 1)))
                     o.name = 'a'
                 else:
-                    if is_datetimetz(o):
+                    if is_datetime64tz_dtype(o):
                         expected_index = orig._values._shallow_copy(values)
                     else:
                         expected_index = Index(values)
@@ -538,7 +538,7 @@ def test_value_counts_unique_nunique_null(self):
                 if isinstance(o, Index):
                     tm.assert_index_equal(result,
                                           Index(values[1:], name='a'))
-                elif is_datetimetz(o):
+                elif is_datetime64tz_dtype(o):
                     # unable to compare NaT / nan
                     vals = values[2:].astype(object).values
                     tm.assert_numpy_array_equal(result[1:], vals)

From 0e7cf4814ae8f85dc90ed6d8e24f0459be0b75e3 Mon Sep 17 00:00:00 2001
From: Christopher Whelan <topherwhelan@gmail.com>
Date: Tue, 27 Nov 2018 14:27:15 -0800
Subject: [PATCH 15/78] Add asv benchmarks for essential functions (#23935)

* PERF: add asv benchmarks for uncovered plotting methods

* PERF: add asv benchmark for DataFrame.rename()

* PERF: add asv benchmarks for .dot()

* PERF: add asv benchmarks for uncovered string methods

* PERF: add asv benchmarks for .expanding() and .ewm()

* PERF: add asv benchmarks for .corr() and .cov()

* PERF: add asv benchmarks for TimedeltaIndex

* PERF: add asv benchmarks for cut()/qcut()
---
 asv_bench/benchmarks/binary_ops.py    | 13 +++++++
 asv_bench/benchmarks/frame_methods.py | 30 +++++++++++++++
 asv_bench/benchmarks/plotting.py      | 53 +++++++++++++++++++++------
 asv_bench/benchmarks/reshape.py       | 38 +++++++++++++++++++
 asv_bench/benchmarks/rolling.py       | 36 ++++++++++++++++++
 asv_bench/benchmarks/stat_ops.py      | 36 ++++++++++++++++--
 asv_bench/benchmarks/strings.py       | 30 +++++++++++++++
 asv_bench/benchmarks/timedelta.py     | 36 +++++++++++++++++-
 8 files changed, 256 insertions(+), 16 deletions(-)

diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py
index dfdebec86d67c3..22b8ed80f3d077 100644
--- a/asv_bench/benchmarks/binary_ops.py
+++ b/asv_bench/benchmarks/binary_ops.py
@@ -52,6 +52,8 @@ def setup(self):
                                                    np.iinfo(np.int16).max,
                                                    size=(N, N)))
 
+        self.s = Series(np.random.randn(N))
+
     # Division
 
     def time_frame_float_div(self):
@@ -74,6 +76,17 @@ def time_frame_int_mod(self):
     def time_frame_float_mod(self):
         self.df % self.df2
 
+    # Dot product
+
+    def time_frame_dot(self):
+        self.df.dot(self.df2)
+
+    def time_series_dot(self):
+        self.s.dot(self.s)
+
+    def time_frame_series_dot(self):
+        self.df.dot(self.s)
+
 
 class Timeseries(object):
 
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index b60b45cc29f7d5..527a2f129cf37a 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -69,6 +69,36 @@ def time_reindex_upcast(self):
         self.df2.reindex(np.random.permutation(range(1200)))
 
 
+class Rename(object):
+
+    def setup(self):
+        N = 10**3
+        self.df = DataFrame(np.random.randn(N * 10, N))
+        self.idx = np.arange(4 * N, 7 * N)
+        self.dict_idx = {k: k for k in self.idx}
+        self.df2 = DataFrame(
+            {c: {0: np.random.randint(0, 2, N).astype(np.bool_),
+                 1: np.random.randint(0, N, N).astype(np.int16),
+                 2: np.random.randint(0, N, N).astype(np.int32),
+                 3: np.random.randint(0, N, N).astype(np.int64)}
+                [np.random.randint(0, 4)] for c in range(N)})
+
+    def time_rename_single(self):
+        self.df.rename({0: 0})
+
+    def time_rename_axis0(self):
+        self.df.rename(self.dict_idx)
+
+    def time_rename_axis1(self):
+        self.df.rename(columns=self.dict_idx)
+
+    def time_rename_both_axes(self):
+        self.df.rename(index=self.dict_idx, columns=self.dict_idx)
+
+    def time_dict_rename_both_axes(self):
+        self.df.rename(index=self.dict_idx, columns=self.dict_idx)
+
+
 class Iteration(object):
 
     def setup(self):
diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py
index 1373d5f0b42589..4f0bbb1690d4b9 100644
--- a/asv_bench/benchmarks/plotting.py
+++ b/asv_bench/benchmarks/plotting.py
@@ -8,17 +8,48 @@
 matplotlib.use('Agg')
 
 
-class Plotting(object):
-
-    def setup(self):
-        self.s = Series(np.random.randn(1000000))
-        self.df = DataFrame({'col': self.s})
-
-    def time_series_plot(self):
-        self.s.plot()
-
-    def time_frame_plot(self):
-        self.df.plot()
+class SeriesPlotting(object):
+    params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie']]
+    param_names = ['kind']
+
+    def setup(self, kind):
+        if kind in ['bar', 'barh', 'pie']:
+            n = 100
+        elif kind in ['kde']:
+            n = 10000
+        else:
+            n = 1000000
+
+        self.s = Series(np.random.randn(n))
+        if kind in ['area', 'pie']:
+            self.s = self.s.abs()
+
+    def time_series_plot(self, kind):
+        self.s.plot(kind=kind)
+
+
+class FramePlotting(object):
+    params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie', 'scatter',
+               'hexbin']]
+    param_names = ['kind']
+
+    def setup(self, kind):
+        if kind in ['bar', 'barh', 'pie']:
+            n = 100
+        elif kind in ['kde', 'scatter', 'hexbin']:
+            n = 10000
+        else:
+            n = 1000000
+
+        self.x = Series(np.random.randn(n))
+        self.y = Series(np.random.randn(n))
+        if kind in ['area', 'pie']:
+            self.x = self.x.abs()
+            self.y = self.y.abs()
+        self.df = DataFrame({'x': self.x, 'y': self.y})
+
+    def time_frame_plot(self, kind):
+        self.df.plot(x='x', y='y', kind=kind)
 
 
 class TimeseriesPlotting(object):
diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py
index 67fdfb82e72c01..e5c2f54263a3cb 100644
--- a/asv_bench/benchmarks/reshape.py
+++ b/asv_bench/benchmarks/reshape.py
@@ -146,4 +146,42 @@ def time_get_dummies_1d_sparse(self):
         pd.get_dummies(self.s, sparse=True)
 
 
+class Cut(object):
+    params = [[4, 10, 1000]]
+    param_names = ['bins']
+
+    def setup(self, bins):
+        N = 10**5
+        self.int_series = pd.Series(np.arange(N).repeat(5))
+        self.float_series = pd.Series(np.random.randn(N).repeat(5))
+        self.timedelta_series = pd.Series(np.random.randint(N, size=N),
+                                          dtype='timedelta64[ns]')
+        self.datetime_series = pd.Series(np.random.randint(N, size=N),
+                                         dtype='datetime64[ns]')
+
+    def time_cut_int(self, bins):
+        pd.cut(self.int_series, bins)
+
+    def time_cut_float(self, bins):
+        pd.cut(self.float_series, bins)
+
+    def time_cut_timedelta(self, bins):
+        pd.cut(self.timedelta_series, bins)
+
+    def time_cut_datetime(self, bins):
+        pd.cut(self.datetime_series, bins)
+
+    def time_qcut_int(self, bins):
+        pd.qcut(self.int_series, bins)
+
+    def time_qcut_float(self, bins):
+        pd.qcut(self.float_series, bins)
+
+    def time_qcut_timedelta(self, bins):
+        pd.qcut(self.timedelta_series, bins)
+
+    def time_qcut_datetime(self, bins):
+        pd.qcut(self.datetime_series, bins)
+
+
 from .pandas_vb_common import setup  # noqa: F401
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
index 86294e33e1e067..659b6591fbd4b1 100644
--- a/asv_bench/benchmarks/rolling.py
+++ b/asv_bench/benchmarks/rolling.py
@@ -21,6 +21,42 @@ def time_rolling(self, constructor, window, dtype, method):
         getattr(self.roll, method)()
 
 
+class ExpandingMethods(object):
+
+    sample_time = 0.2
+    params = (['DataFrame', 'Series'],
+              ['int', 'float'],
+              ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
+               'sum'])
+    param_names = ['contructor', 'window', 'dtype', 'method']
+
+    def setup(self, constructor, dtype, method):
+        N = 10**5
+        arr = (100 * np.random.random(N)).astype(dtype)
+        self.expanding = getattr(pd, constructor)(arr).expanding()
+
+    def time_expanding(self, constructor, dtype, method):
+        getattr(self.expanding, method)()
+
+
+class EWMMethods(object):
+
+    sample_time = 0.2
+    params = (['DataFrame', 'Series'],
+              [10, 1000],
+              ['int', 'float'],
+              ['mean', 'std'])
+    param_names = ['contructor', 'window', 'dtype', 'method']
+
+    def setup(self, constructor, window, dtype, method):
+        N = 10**5
+        arr = (100 * np.random.random(N)).astype(dtype)
+        self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
+
+    def time_ewm(self, constructor, window, dtype, method):
+        getattr(self.ewm, method)()
+
+
 class VariableWindowMethods(Methods):
     sample_time = 0.2
     params = (['DataFrame', 'Series'],
diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py
index 5c777c00261e1a..66ded52ca35b2a 100644
--- a/asv_bench/benchmarks/stat_ops.py
+++ b/asv_bench/benchmarks/stat_ops.py
@@ -96,14 +96,42 @@ def time_average_old(self, constructor, pct):
 
 class Correlation(object):
 
-    params = ['spearman', 'kendall', 'pearson']
-    param_names = ['method']
+    params = [['spearman', 'kendall', 'pearson'], [True, False]]
+    param_names = ['method', 'use_bottleneck']
 
-    def setup(self, method):
+    def setup(self, method, use_bottleneck):
+        try:
+            pd.options.compute.use_bottleneck = use_bottleneck
+        except TypeError:
+            from pandas.core import nanops
+            nanops._USE_BOTTLENECK = use_bottleneck
         self.df = pd.DataFrame(np.random.randn(1000, 30))
+        self.s = pd.Series(np.random.randn(1000))
+        self.s2 = pd.Series(np.random.randn(1000))
 
-    def time_corr(self, method):
+    def time_corr(self, method, use_bottleneck):
         self.df.corr(method=method)
 
+    def time_corr_series(self, method, use_bottleneck):
+        self.s.corr(self.s2, method=method)
+
+
+class Covariance(object):
+
+    params = [[True, False]]
+    param_names = ['use_bottleneck']
+
+    def setup(self, use_bottleneck):
+        try:
+            pd.options.compute.use_bottleneck = use_bottleneck
+        except TypeError:
+            from pandas.core import nanops
+            nanops._USE_BOTTLENECK = use_bottleneck
+        self.s = pd.Series(np.random.randn(100000))
+        self.s2 = pd.Series(np.random.randn(100000))
+
+    def time_cov_series(self, use_bottleneck):
+        self.s.cov(self.s2)
+
 
 from .pandas_vb_common import setup  # noqa: F401
diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index d880fb258560d1..e9f2727f64e15c 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -26,21 +26,42 @@ def time_extract(self):
     def time_findall(self):
         self.s.str.findall('[A-Z]+')
 
+    def time_find(self):
+        self.s.str.find('[A-Z]+')
+
+    def time_rfind(self):
+        self.s.str.rfind('[A-Z]+')
+
     def time_get(self):
         self.s.str.get(0)
 
     def time_len(self):
         self.s.str.len()
 
+    def time_join(self):
+        self.s.str.join(' ')
+
     def time_match(self):
         self.s.str.match('A')
 
+    def time_normalize(self):
+        self.s.str.normalize('NFC')
+
     def time_pad(self):
         self.s.str.pad(100, side='both')
 
+    def time_partition(self):
+        self.s.str.partition('A')
+
+    def time_rpartition(self):
+        self.s.str.rpartition('A')
+
     def time_replace(self):
         self.s.str.replace('A', '\x01\x01')
 
+    def time_translate(self):
+        self.s.str.translate({'A': '\x01\x01'})
+
     def time_slice(self):
         self.s.str.slice(5, 15, 2)
 
@@ -65,6 +86,12 @@ def time_upper(self):
     def time_lower(self):
         self.s.str.lower()
 
+    def time_wrap(self):
+        self.s.str.wrap(10)
+
+    def time_zfill(self):
+        self.s.str.zfill(10)
+
 
 class Repeat(object):
 
@@ -129,6 +156,9 @@ def setup(self, expand):
     def time_split(self, expand):
         self.s.str.split('--', expand=expand)
 
+    def time_rsplit(self, expand):
+        self.s.str.rsplit('--', expand=expand)
+
 
 class Dummies(object):
 
diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py
index 01d53fb9cbbd99..7ee73fb7ac7b65 100644
--- a/asv_bench/benchmarks/timedelta.py
+++ b/asv_bench/benchmarks/timedelta.py
@@ -1,7 +1,8 @@
 import datetime
 
 import numpy as np
-from pandas import Series, timedelta_range, to_timedelta, Timestamp, Timedelta
+from pandas import Series, timedelta_range, to_timedelta, Timestamp, \
+    Timedelta, TimedeltaIndex, DataFrame
 
 
 class TimedeltaConstructor(object):
@@ -116,3 +117,36 @@ def time_timedelta_microseconds(self, series):
 
     def time_timedelta_nanoseconds(self, series):
         series.dt.nanoseconds
+
+
+class TimedeltaIndexing(object):
+
+    def setup(self):
+        self.index = TimedeltaIndex(start='1985', periods=1000, freq='D')
+        self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D')
+        self.series = Series(range(1000), index=self.index)
+        self.timedelta = self.index[500]
+
+    def time_get_loc(self):
+        self.index.get_loc(self.timedelta)
+
+    def time_shape(self):
+        self.index.shape
+
+    def time_shallow_copy(self):
+        self.index._shallow_copy()
+
+    def time_series_loc(self):
+        self.series.loc[self.timedelta]
+
+    def time_align(self):
+        DataFrame({'a': self.series, 'b': self.series[:500]})
+
+    def time_intersection(self):
+        self.index.intersection(self.index2)
+
+    def time_union(self):
+        self.index.union(self.index2)
+
+    def time_unique(self):
+        self.index.unique()

From db8d33e26395eb567a43b121258b0a05ddc33706 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 28 Nov 2018 05:13:27 -0800
Subject: [PATCH 16/78] isort plotting, a handful of test dirs (#23957)

---
 pandas/plotting/_compat.py                    |  2 +-
 pandas/plotting/_converter.py                 | 29 ++++-------
 pandas/plotting/_core.py                      | 39 ++++++--------
 pandas/plotting/_misc.py                      |  8 +--
 pandas/plotting/_style.py                     |  7 +--
 pandas/plotting/_timeseries.py                | 18 +++----
 pandas/plotting/_tools.py                     |  7 +--
 pandas/tests/generic/test_frame.py            | 17 +++---
 pandas/tests/generic/test_generic.py          | 20 ++++---
 .../generic/test_label_or_level_utils.py      |  4 +-
 pandas/tests/generic/test_panel.py            |  8 +--
 pandas/tests/generic/test_series.py           | 15 +++---
 pandas/tests/tseries/offsets/conftest.py      |  1 +
 pandas/tests/tseries/offsets/test_fiscal.py   |  6 ++-
 pandas/tests/tseries/offsets/test_offsets.py  | 52 ++++++++-----------
 .../offsets/test_offsets_properties.py        | 12 ++---
 pandas/tests/tseries/offsets/test_ticks.py    |  9 ++--
 .../tests/tseries/offsets/test_yqm_offsets.py | 15 +++---
 pandas/tests/tseries/test_frequencies.py      | 28 +++++-----
 pandas/tests/tseries/test_holiday.py          | 29 +++++------
 setup.cfg                                     | 20 -------
 21 files changed, 149 insertions(+), 197 deletions(-)

diff --git a/pandas/plotting/_compat.py b/pandas/plotting/_compat.py
index 385e88d58cc264..48900c088a1258 100644
--- a/pandas/plotting/_compat.py
+++ b/pandas/plotting/_compat.py
@@ -1,9 +1,9 @@
 # being a bit too dynamic
 # pylint: disable=E1101
 from __future__ import division
-import operator
 
 from distutils.version import LooseVersion
+import operator
 
 
 def _mpl_version(version, op):
diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py
index 444b742ae706e4..20e4fdffc39b6b 100644
--- a/pandas/plotting/_converter.py
+++ b/pandas/plotting/_converter.py
@@ -1,39 +1,32 @@
-import warnings
-from datetime import datetime, timedelta
 import datetime as pydt
-import numpy as np
+from datetime import datetime, timedelta
+import warnings
 
 from dateutil.relativedelta import relativedelta
-
-import matplotlib.units as units
 import matplotlib.dates as dates
-
-from matplotlib.ticker import Formatter, AutoLocator, Locator
+from matplotlib.ticker import AutoLocator, Formatter, Locator
 from matplotlib.transforms import nonsingular
+import matplotlib.units as units
+import numpy as np
 
 from pandas._libs import tslibs
 from pandas._libs.tslibs import resolution
+import pandas.compat as compat
+from pandas.compat import lrange
 
 from pandas.core.dtypes.common import (
-    is_float, is_integer,
-    is_integer_dtype,
-    is_float_dtype,
-    is_datetime64_ns_dtype,
-    is_period_arraylike,
-    is_nested_list_like
-)
+    is_datetime64_ns_dtype, is_float, is_float_dtype, is_integer,
+    is_integer_dtype, is_nested_list_like, is_period_arraylike)
 from pandas.core.dtypes.generic import ABCSeries
 
-from pandas.compat import lrange
-import pandas.compat as compat
 import pandas.core.common as com
 from pandas.core.index import Index
-
 from pandas.core.indexes.datetimes import date_range
+from pandas.core.indexes.period import Period, PeriodIndex
 import pandas.core.tools.datetimes as tools
+
 import pandas.tseries.frequencies as frequencies
 from pandas.tseries.frequencies import FreqGroup
-from pandas.core.indexes.period import Period, PeriodIndex
 
 # constants
 HOURS_PER_DAY = 24.
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 08a35683cbcf7d..8574275c8478b0 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -2,42 +2,35 @@
 # pylint: disable=E1101
 from __future__ import division
 
-import warnings
-import re
 from collections import namedtuple
 from distutils.version import LooseVersion
+import re
+import warnings
 
 import numpy as np
 
-from pandas.util._decorators import cache_readonly, Appender
-from pandas.compat import range, lrange, map, zip, string_types
 import pandas.compat as compat
+from pandas.compat import lrange, map, range, string_types, zip
 from pandas.errors import AbstractMethodError
+from pandas.util._decorators import Appender, cache_readonly
 
-import pandas.core.common as com
-from pandas.core.base import PandasObject
-from pandas.core.config import get_option
-from pandas.core.generic import _shared_docs, _shared_doc_kwargs
-
-from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike
 from pandas.core.dtypes.common import (
-    is_list_like,
-    is_integer,
-    is_number,
-    is_hashable,
-    is_iterator)
+    is_hashable, is_integer, is_iterator, is_list_like, is_number)
 from pandas.core.dtypes.generic import (
-    ABCSeries, ABCDataFrame, ABCPeriodIndex, ABCMultiIndex, ABCIndexClass)
+    ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCPeriodIndex, ABCSeries)
+from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike
 
-from pandas.io.formats.printing import pprint_thing
+from pandas.core.base import PandasObject
+import pandas.core.common as com
+from pandas.core.config import get_option
+from pandas.core.generic import _shared_doc_kwargs, _shared_docs
 
+from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._compat import _mpl_ge_3_0_0
-from pandas.plotting._style import (plot_params,
-                                    _get_standard_colors)
-from pandas.plotting._tools import (_subplots, _flatten, table,
-                                    _handle_shared_axes, _get_all_lines,
-                                    _get_xlim, _set_ticks_props,
-                                    format_date_labels)
+from pandas.plotting._style import _get_standard_colors, plot_params
+from pandas.plotting._tools import (
+    _flatten, _get_all_lines, _get_xlim, _handle_shared_axes, _set_ticks_props,
+    _subplots, format_date_labels, table)
 
 try:
     from pandas.plotting import _converter
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
index dbad5a04161c98..1c69c03025e007 100644
--- a/pandas/plotting/_misc.py
+++ b/pandas/plotting/_misc.py
@@ -4,14 +4,14 @@
 
 import numpy as np
 
+from pandas.compat import lmap, lrange, range, zip
 from pandas.util._decorators import deprecate_kwarg
-from pandas.core.dtypes.missing import notna
-from pandas.compat import range, lrange, lmap, zip
-from pandas.io.formats.printing import pprint_thing
 
+from pandas.core.dtypes.missing import notna
 
+from pandas.io.formats.printing import pprint_thing
 from pandas.plotting._style import _get_standard_colors
-from pandas.plotting._tools import _subplots, _set_ticks_props
+from pandas.plotting._tools import _set_ticks_props, _subplots
 
 
 def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py
index da26c0f8fa7e21..930c3d1775ad81 100644
--- a/pandas/plotting/_style.py
+++ b/pandas/plotting/_style.py
@@ -2,14 +2,15 @@
 # pylint: disable=E1101
 from __future__ import division
 
-import warnings
 from contextlib import contextmanager
+import warnings
 
 import numpy as np
 
-from pandas.core.dtypes.common import is_list_like
-from pandas.compat import lrange, lmap
 import pandas.compat as compat
+from pandas.compat import lmap, lrange
+
+from pandas.core.dtypes.common import is_list_like
 
 
 def _get_standard_colors(num_colors=None, colormap=None, color_type='default',
diff --git a/pandas/plotting/_timeseries.py b/pandas/plotting/_timeseries.py
index 96e7532747c78f..49249ae4467470 100644
--- a/pandas/plotting/_timeseries.py
+++ b/pandas/plotting/_timeseries.py
@@ -2,23 +2,21 @@
 
 import functools
 
-import numpy as np
 from matplotlib import pylab
+import numpy as np
 
 from pandas._libs.tslibs.period import Period
+import pandas.compat as compat
 
 from pandas.core.dtypes.generic import (
-    ABCPeriodIndex, ABCDatetimeIndex, ABCTimedeltaIndex)
-
-from pandas.tseries.offsets import DateOffset
-import pandas.tseries.frequencies as frequencies
+    ABCDatetimeIndex, ABCPeriodIndex, ABCTimedeltaIndex)
 
 from pandas.io.formats.printing import pprint_thing
-import pandas.compat as compat
-
-from pandas.plotting._converter import (TimeSeries_DateLocator,
-                                        TimeSeries_DateFormatter,
-                                        TimeSeries_TimedeltaFormatter)
+from pandas.plotting._converter import (
+    TimeSeries_DateFormatter, TimeSeries_DateLocator,
+    TimeSeries_TimedeltaFormatter)
+import pandas.tseries.frequencies as frequencies
+from pandas.tseries.offsets import DateOffset
 
 # ---------------------------------------------------------------------
 # Plotting functions and monkey patches
diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py
index 7b18a36738bdab..4d9e97f11fdd39 100644
--- a/pandas/plotting/_tools.py
+++ b/pandas/plotting/_tools.py
@@ -2,15 +2,16 @@
 # pylint: disable=E1101
 from __future__ import division
 
-import warnings
 from math import ceil
+import warnings
 
 import numpy as np
 
-from pandas.core.dtypes.common import is_list_like
-from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCDataFrame
 from pandas.compat import range
 
+from pandas.core.dtypes.common import is_list_like
+from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
+
 
 def format_date_labels(ax, rot):
     # mini version of autofmt_xdate
diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py
index 9da59ca77d862d..f6d5bf86d1489e 100644
--- a/pandas/tests/generic/test_frame.py
+++ b/pandas/tests/generic/test_frame.py
@@ -1,23 +1,22 @@
 # -*- coding: utf-8 -*-
 # pylint: disable-msg=E1101,W0612
 
-from operator import methodcaller
 from copy import deepcopy
 from distutils.version import LooseVersion
+from operator import methodcaller
 
-import pytest
 import numpy as np
-import pandas as pd
-
-from pandas import Series, DataFrame, date_range, MultiIndex
+import pytest
 
 from pandas.compat import range
-from pandas.util.testing import (assert_series_equal,
-                                 assert_frame_equal,
-                                 assert_almost_equal)
+import pandas.util._test_decorators as td
 
+import pandas as pd
+from pandas import DataFrame, MultiIndex, Series, date_range
 import pandas.util.testing as tm
-import pandas.util._test_decorators as td
+from pandas.util.testing import (
+    assert_almost_equal, assert_frame_equal, assert_series_equal)
+
 from .test_generic import Generic
 
 try:
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
index 753e6161d8052a..e7d5aebeb97f2b 100644
--- a/pandas/tests/generic/test_generic.py
+++ b/pandas/tests/generic/test_generic.py
@@ -4,27 +4,25 @@
 from copy import copy, deepcopy
 from warnings import catch_warnings, simplefilter
 
-import pytest
 import numpy as np
-import pandas as pd
-
-from pandas.core.dtypes.common import is_scalar
-from pandas import (Series, DataFrame, Panel,
-                    date_range, MultiIndex)
+import pytest
 
-import pandas.io.formats.printing as printing
+from pandas.compat import PY3, range, zip
 
-from pandas.compat import range, zip, PY3
-from pandas.util.testing import (assert_series_equal,
-                                 assert_panel_equal,
-                                 assert_frame_equal)
+from pandas.core.dtypes.common import is_scalar
 
+import pandas as pd
+from pandas import DataFrame, MultiIndex, Panel, Series, date_range
 import pandas.util.testing as tm
+from pandas.util.testing import (
+    assert_frame_equal, assert_panel_equal, assert_series_equal)
 
+import pandas.io.formats.printing as printing
 
 # ----------------------------------------------------------------------
 # Generic types test cases
 
+
 class Generic(object):
 
     @property
diff --git a/pandas/tests/generic/test_label_or_level_utils.py b/pandas/tests/generic/test_label_or_level_utils.py
index 5cb5e935752a7b..91c58e01f0c458 100644
--- a/pandas/tests/generic/test_label_or_level_utils.py
+++ b/pandas/tests/generic/test_label_or_level_utils.py
@@ -1,7 +1,9 @@
 import pytest
+
+from pandas.core.dtypes.missing import array_equivalent
+
 import pandas as pd
 import pandas.util.testing as tm
-from pandas.core.dtypes.missing import array_equivalent
 
 
 # Fixtures
diff --git a/pandas/tests/generic/test_panel.py b/pandas/tests/generic/test_panel.py
index fe80b2af5ea639..8b090d951957e5 100644
--- a/pandas/tests/generic/test_panel.py
+++ b/pandas/tests/generic/test_panel.py
@@ -3,12 +3,12 @@
 
 from warnings import catch_warnings, simplefilter
 
-from pandas import Panel
-from pandas.util.testing import (assert_panel_equal,
-                                 assert_almost_equal)
+import pandas.util._test_decorators as td
 
+from pandas import Panel
 import pandas.util.testing as tm
-import pandas.util._test_decorators as td
+from pandas.util.testing import assert_almost_equal, assert_panel_equal
+
 from .test_generic import Generic
 
 
diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py
index f0c6c969f765a0..10430ebde8225a 100644
--- a/pandas/tests/generic/test_series.py
+++ b/pandas/tests/generic/test_series.py
@@ -1,21 +1,20 @@
 # -*- coding: utf-8 -*-
 # pylint: disable-msg=E1101,W0612
 
+from distutils.version import LooseVersion
 from operator import methodcaller
 
-import pytest
 import numpy as np
-import pandas as pd
-
-from distutils.version import LooseVersion
-from pandas import Series, date_range, MultiIndex
+import pytest
 
 from pandas.compat import range
-from pandas.util.testing import (assert_series_equal,
-                                 assert_almost_equal)
+import pandas.util._test_decorators as td
 
+import pandas as pd
+from pandas import MultiIndex, Series, date_range
 import pandas.util.testing as tm
-import pandas.util._test_decorators as td
+from pandas.util.testing import assert_almost_equal, assert_series_equal
+
 from .test_generic import Generic
 
 try:
diff --git a/pandas/tests/tseries/offsets/conftest.py b/pandas/tests/tseries/offsets/conftest.py
index 4766e7e277b13a..db8379e3356799 100644
--- a/pandas/tests/tseries/offsets/conftest.py
+++ b/pandas/tests/tseries/offsets/conftest.py
@@ -1,4 +1,5 @@
 import pytest
+
 import pandas.tseries.offsets as offsets
 
 
diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py
index 2f17a619173203..a5d7460921fb42 100644
--- a/pandas/tests/tseries/offsets/test_fiscal.py
+++ b/pandas/tests/tseries/offsets/test_fiscal.py
@@ -7,10 +7,12 @@
 from dateutil.relativedelta import relativedelta
 import pytest
 
+from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG
+
 from pandas import Timestamp
+
 from pandas.tseries.frequencies import get_offset
-from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG
-from pandas.tseries.offsets import FY5253Quarter, FY5253
+from pandas.tseries.offsets import FY5253, FY5253Quarter
 
 from .common import assert_offset_equal, assert_onOffset
 from .test_offsets import Base, WeekDay
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index d68dd65c9841b5..5b820d7b83a323 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -1,42 +1,36 @@
-from distutils.version import LooseVersion
 from datetime import date, datetime, timedelta
+from distutils.version import LooseVersion
 
+import numpy as np
 import pytest
 import pytz
-from pandas.compat import range
-from pandas import compat
-
-import numpy as np
 
+import pandas._libs.tslib as tslib
+from pandas._libs.tslib import NaT, Timestamp
+from pandas._libs.tslibs import conversion, timezones
+from pandas._libs.tslibs.frequencies import (
+    INVALID_FREQ_ERR_MSG, get_freq_code, get_freq_str)
+import pandas._libs.tslibs.offsets as liboffsets
+from pandas._libs.tslibs.timedeltas import Timedelta
+import pandas.compat as compat
+from pandas.compat import range
 from pandas.compat.numpy import np_datetime64_compat
 
-from pandas.core.series import Series
-from pandas._libs.tslibs import conversion
-from pandas._libs.tslibs.frequencies import (get_freq_code, get_freq_str,
-                                             INVALID_FREQ_ERR_MSG)
-from pandas.tseries.frequencies import _offset_map, get_offset
-from pandas.core.indexes.datetimes import _to_m8, DatetimeIndex
+from pandas.core.indexes.datetimes import DatetimeIndex, _to_m8
 from pandas.core.indexes.timedeltas import TimedeltaIndex
-import pandas._libs.tslibs.offsets as liboffsets
-from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd,
-                                    BusinessHour, WeekOfMonth, CBMonthEnd,
-                                    CustomBusinessHour,
-                                    CBMonthBegin, BYearEnd, MonthEnd,
-                                    MonthBegin, SemiMonthBegin, SemiMonthEnd,
-                                    BYearBegin, QuarterBegin, BQuarterBegin,
-                                    BMonthBegin, DateOffset, Week, YearBegin,
-                                    YearEnd, Day,
-                                    QuarterEnd, FY5253,
-                                    Nano, Easter, FY5253Quarter,
-                                    LastWeekOfMonth, Tick, CalendarDay)
-import pandas.tseries.offsets as offsets
-from pandas.io.pickle import read_pickle
-from pandas._libs.tslibs import timezones
-from pandas._libs.tslib import NaT, Timestamp
-from pandas._libs.tslibs.timedeltas import Timedelta
-import pandas._libs.tslib as tslib
+from pandas.core.series import Series
 import pandas.util.testing as tm
+
+from pandas.io.pickle import read_pickle
+from pandas.tseries.frequencies import _offset_map, get_offset
 from pandas.tseries.holiday import USFederalHolidayCalendar
+import pandas.tseries.offsets as offsets
+from pandas.tseries.offsets import (
+    FY5253, BDay, BMonthBegin, BMonthEnd, BQuarterBegin, BQuarterEnd,
+    BusinessHour, BYearBegin, BYearEnd, CalendarDay, CBMonthBegin, CBMonthEnd,
+    CDay, CustomBusinessHour, DateOffset, Day, Easter, FY5253Quarter,
+    LastWeekOfMonth, MonthBegin, MonthEnd, Nano, QuarterBegin, QuarterEnd,
+    SemiMonthBegin, SemiMonthEnd, Tick, Week, WeekOfMonth, YearBegin, YearEnd)
 
 from .common import assert_offset_equal, assert_onOffset
 
diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py
index 07a6895d1e2311..a1b68d34f4e183 100644
--- a/pandas/tests/tseries/offsets/test_offsets_properties.py
+++ b/pandas/tests/tseries/offsets/test_offsets_properties.py
@@ -10,18 +10,16 @@
 """
 import warnings
 
-import pytest
-from hypothesis import given, assume, strategies as st
-from hypothesis.extra.pytz import timezones as pytz_timezones
+from hypothesis import assume, given, strategies as st
 from hypothesis.extra.dateutil import timezones as dateutil_timezones
+from hypothesis.extra.pytz import timezones as pytz_timezones
+import pytest
 
 import pandas as pd
 
 from pandas.tseries.offsets import (
-    MonthEnd, MonthBegin, BMonthEnd, BMonthBegin,
-    QuarterEnd, QuarterBegin, BQuarterEnd, BQuarterBegin,
-    YearEnd, YearBegin, BYearEnd, BYearBegin,
-)
+    BMonthBegin, BMonthEnd, BQuarterBegin, BQuarterEnd, BYearBegin, BYearEnd,
+    MonthBegin, MonthEnd, QuarterBegin, QuarterEnd, YearBegin, YearEnd)
 
 # ----------------------------------------------------------------
 # Helpers for generating random data
diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py
index 128010fe6d32ca..d56a8b1cda628b 100644
--- a/pandas/tests/tseries/offsets/test_ticks.py
+++ b/pandas/tests/tseries/offsets/test_ticks.py
@@ -4,14 +4,15 @@
 """
 from datetime import datetime, timedelta
 
-import pytest
+from hypothesis import assume, example, given, strategies as st
 import numpy as np
-from hypothesis import given, assume, example, strategies as st
+import pytest
 
 from pandas import Timedelta, Timestamp
+
 from pandas.tseries import offsets
-from pandas.tseries.offsets import (Day, Hour, Minute, Second, Milli, Micro,
-                                    Nano)
+from pandas.tseries.offsets import (
+    Day, Hour, Micro, Milli, Minute, Nano, Second)
 
 from .common import assert_offset_equal
 
diff --git a/pandas/tests/tseries/offsets/test_yqm_offsets.py b/pandas/tests/tseries/offsets/test_yqm_offsets.py
index 22b8cf6119d182..8023ee3139dd5e 100644
--- a/pandas/tests/tseries/offsets/test_yqm_offsets.py
+++ b/pandas/tests/tseries/offsets/test_yqm_offsets.py
@@ -7,22 +7,19 @@
 import pytest
 
 import pandas as pd
-from pandas import Timestamp
-from pandas import compat
+from pandas import Timestamp, compat
 
-from pandas.tseries.offsets import (BMonthBegin, BMonthEnd,
-                                    MonthBegin, MonthEnd,
-                                    YearEnd, YearBegin, BYearEnd, BYearBegin,
-                                    QuarterEnd, QuarterBegin,
-                                    BQuarterEnd, BQuarterBegin)
+from pandas.tseries.offsets import (
+    BMonthBegin, BMonthEnd, BQuarterBegin, BQuarterEnd, BYearBegin, BYearEnd,
+    MonthBegin, MonthEnd, QuarterBegin, QuarterEnd, YearBegin, YearEnd)
 
-from .test_offsets import Base
 from .common import assert_offset_equal, assert_onOffset
-
+from .test_offsets import Base
 
 # --------------------------------------------------------------------
 # Misc
 
+
 def test_quarterly_dont_normalize():
     date = datetime(2012, 3, 31, 5, 30)
 
diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py
index a8def56aa06d49..d2ca70795be805 100644
--- a/pandas/tests/tseries/test_frequencies.py
+++ b/pandas/tests/tseries/test_frequencies.py
@@ -1,26 +1,24 @@
 from datetime import datetime, timedelta
-from pandas.compat import range
 
-import pytest
 import numpy as np
+import pytest
 
-from pandas import (Index, DatetimeIndex, Timestamp, Series,
-                    date_range, period_range)
-
-from pandas._libs.tslibs.frequencies import (_period_code_map,
-                                             INVALID_FREQ_ERR_MSG)
-from pandas._libs.tslibs.ccalendar import MONTHS
 from pandas._libs.tslibs import resolution
-import pandas.tseries.frequencies as frequencies
-from pandas.core.tools.datetimes import to_datetime
-
-import pandas.tseries.offsets as offsets
-from pandas.core.indexes.period import PeriodIndex
+from pandas._libs.tslibs.ccalendar import MONTHS
+from pandas._libs.tslibs.frequencies import (
+    INVALID_FREQ_ERR_MSG, _period_code_map)
 import pandas.compat as compat
-from pandas.compat import is_platform_windows
+from pandas.compat import is_platform_windows, range
 
+from pandas import (
+    DatetimeIndex, Index, Series, Timedelta, Timestamp, date_range,
+    period_range)
+from pandas.core.indexes.period import PeriodIndex
+from pandas.core.tools.datetimes import to_datetime
 import pandas.util.testing as tm
-from pandas import Timedelta
+
+import pandas.tseries.frequencies as frequencies
+import pandas.tseries.offsets as offsets
 
 
 class TestToOffset(object):
diff --git a/pandas/tests/tseries/test_holiday.py b/pandas/tests/tseries/test_holiday.py
index 3ea7e5b8620f26..86f154ed1acc22 100644
--- a/pandas/tests/tseries/test_holiday.py
+++ b/pandas/tests/tseries/test_holiday.py
@@ -1,22 +1,19 @@
+from datetime import datetime
+
 import pytest
+from pytz import utc
 
-from datetime import datetime
+from pandas import DatetimeIndex, compat
 import pandas.util.testing as tm
-from pandas import compat
-from pandas import DatetimeIndex
-from pandas.tseries.holiday import (USFederalHolidayCalendar, USMemorialDay,
-                                    USThanksgivingDay, nearest_workday,
-                                    next_monday_or_tuesday, next_monday,
-                                    previous_friday, sunday_to_monday, Holiday,
-                                    DateOffset, MO, SA, Timestamp,
-                                    AbstractHolidayCalendar, get_calendar,
-                                    HolidayCalendarFactory, next_workday,
-                                    previous_workday, before_nearest_workday,
-                                    EasterMonday, GoodFriday,
-                                    after_nearest_workday, weekend_to_monday,
-                                    USLaborDay, USColumbusDay,
-                                    USMartinLutherKingJr, USPresidentsDay)
-from pytz import utc
+
+from pandas.tseries.holiday import (
+    MO, SA, AbstractHolidayCalendar, DateOffset, EasterMonday, GoodFriday,
+    Holiday, HolidayCalendarFactory, Timestamp, USColumbusDay,
+    USFederalHolidayCalendar, USLaborDay, USMartinLutherKingJr, USMemorialDay,
+    USPresidentsDay, USThanksgivingDay, after_nearest_workday,
+    before_nearest_workday, get_calendar, nearest_workday, next_monday,
+    next_monday_or_tuesday, next_workday, previous_friday, previous_workday,
+    sunday_to_monday, weekend_to_monday)
 
 
 class TestCalendar(object):
diff --git a/setup.cfg b/setup.cfg
index 010269060e2b03..8fba814188af53 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -251,14 +251,6 @@ skip=
     pandas/tests/groupby/aggregate/test_cython.py,
     pandas/tests/groupby/aggregate/test_other.py,
     pandas/tests/groupby/aggregate/test_aggregate.py,
-    pandas/tests/tseries/test_frequencies.py,
-    pandas/tests/tseries/test_holiday.py,
-    pandas/tests/tseries/offsets/test_offsets_properties.py,
-    pandas/tests/tseries/offsets/test_yqm_offsets.py,
-    pandas/tests/tseries/offsets/test_offsets.py,
-    pandas/tests/tseries/offsets/test_ticks.py,
-    pandas/tests/tseries/offsets/conftest.py,
-    pandas/tests/tseries/offsets/test_fiscal.py,
     pandas/tests/plotting/test_datetimelike.py,
     pandas/tests/plotting/test_series.py,
     pandas/tests/plotting/test_groupby.py,
@@ -304,11 +296,6 @@ skip=
     pandas/tests/frame/test_mutate_columns.py,
     pandas/tests/frame/test_alter_axes.py,
     pandas/tests/frame/test_rank.py,
-    pandas/tests/generic/test_generic.py,
-    pandas/tests/generic/test_label_or_level_utils.py,
-    pandas/tests/generic/test_series.py,
-    pandas/tests/generic/test_frame.py,
-    pandas/tests/generic/test_panel.py,
     pandas/tests/reshape/test_concat.py,
     pandas/tests/reshape/test_util.py,
     pandas/tests/reshape/test_reshape.py,
@@ -340,13 +327,6 @@ skip=
     pandas/tests/sparse/frame/conftest.py,
     pandas/tests/computation/test_compat.py,
     pandas/tests/computation/test_eval.py,
-    pandas/plotting/_core.py,
-    pandas/plotting/_style.py,
-    pandas/plotting/_timeseries.py,
-    pandas/plotting/_tools.py,
-    pandas/plotting/_converter.py,
-    pandas/plotting/_misc.py,
     pandas/types/common.py,
-    pandas/plotting/_compat.py,
     pandas/tests/extension/arrow/test_bool.py
     doc/source/conf.py

From ca70fe6fc3301fc292bc85957a18b45af0d84418 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Wed, 28 Nov 2018 05:45:34 -0800
Subject: [PATCH 17/78] CLN: Add more cdefs for tslib.pyx (#23966)

---
 pandas/_libs/tslib.pyx | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index c5bc969ede3c90..ddeaffbfb3cc04 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -76,7 +76,8 @@ cdef inline object create_time_from_ts(
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"):
+def ints_to_pydatetime(int64_t[:] arr, object tz=None, object freq=None,
+                       str box="datetime"):
     """
     Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp
 
@@ -104,8 +105,9 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"):
         int64_t[:] deltas
         Py_ssize_t pos
         npy_datetimestruct dts
-        object dt
-        int64_t value, delta
+        object dt, new_tz
+        str typ
+        int64_t value, delta, local_value
         ndarray[object] result = np.empty(n, dtype=object)
         object (*func_create)(int64_t, npy_datetimestruct, object, object)
 
@@ -303,7 +305,8 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None,
     return result
 
 
-def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
+def array_with_unit_to_datetime(ndarray values, object unit,
+                                str errors='coerce'):
     """
     convert the ndarray according to the unit
     if errors:
@@ -458,10 +461,10 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cpdef array_to_datetime(ndarray[object] values, errors='raise',
-                        dayfirst=False, yearfirst=False,
-                        format=None, utc=None,
-                        require_iso8601=False):
+cpdef array_to_datetime(ndarray[object] values, str errors='raise',
+                        bint dayfirst=False, bint yearfirst=False,
+                        object format=None, object utc=None,
+                        bint require_iso8601=False):
     """
     Converts a 1D array of date-like values to a numpy array of either:
         1) datetime64[ns] data
@@ -510,9 +513,11 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
         bint is_raise = errors=='raise'
         bint is_ignore = errors=='ignore'
         bint is_coerce = errors=='coerce'
+        bint is_same_offsets
         _TSObject _ts
+        int64_t value
         int out_local=0, out_tzoffset=0
-        float offset_seconds
+        float offset_seconds, tz_offset
         set out_tzoffset_vals = set()
 
     # specify error conditions
@@ -764,7 +769,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef array_to_datetime_object(ndarray[object] values, bint is_raise,
-                              dayfirst=False, yearfirst=False):
+                              bint dayfirst=False, bint yearfirst=False):
     """
     Fall back function for array_to_datetime
 

From 6b4b9560eba3d18e3fdcf2e5e8e9ac64fbaa5ecc Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Wed, 28 Nov 2018 15:10:34 +0100
Subject: [PATCH 18/78] TST: add method/dtype coverage to str-accessor;
 precursor to #23167 (#23582)

---
 pandas/conftest.py                    |  82 +++++++++++++-
 pandas/tests/dtypes/test_inference.py |   7 ++
 pandas/tests/test_strings.py          | 153 +++++++++++++++++++++++++-
 3 files changed, 235 insertions(+), 7 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index f450193d9388e3..29c3abf4d1e1b6 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1,3 +1,5 @@
+from datetime import date, time, timedelta
+from decimal import Decimal
 import importlib
 import os
 
@@ -8,7 +10,7 @@
 import pytest
 from pytz import FixedOffset, utc
 
-from pandas.compat import PY3
+from pandas.compat import PY3, u
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -514,6 +516,84 @@ def any_numpy_dtype(request):
     return request.param
 
 
+# categoricals are handled separately
+_any_skipna_inferred_dtype = [
+    ('string', ['a', np.nan, 'c']),
+    ('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]),
+    ('bytes' if PY3 else 'string', [b'a', np.nan, b'c']),
+    ('empty', [np.nan, np.nan, np.nan]),
+    ('empty', []),
+    ('mixed-integer', ['a', np.nan, 2]),
+    ('mixed', ['a', np.nan, 2.0]),
+    ('floating', [1.0, np.nan, 2.0]),
+    ('integer', [1, np.nan, 2]),
+    ('mixed-integer-float', [1, np.nan, 2.0]),
+    ('decimal', [Decimal(1), np.nan, Decimal(2)]),
+    ('boolean', [True, np.nan, False]),
+    ('datetime64', [np.datetime64('2013-01-01'), np.nan,
+                    np.datetime64('2018-01-01')]),
+    ('datetime', [pd.Timestamp('20130101'), np.nan, pd.Timestamp('20180101')]),
+    ('date', [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
+    # The following two dtypes are commented out due to GH 23554
+    # ('complex', [1 + 1j, np.nan, 2 + 2j]),
+    # ('timedelta64', [np.timedelta64(1, 'D'),
+    #                  np.nan, np.timedelta64(2, 'D')]),
+    ('timedelta', [timedelta(1), np.nan, timedelta(2)]),
+    ('time', [time(1), np.nan, time(2)]),
+    ('period', [pd.Period(2013), pd.NaT, pd.Period(2018)]),
+    ('interval', [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)])]
+ids, _ = zip(*_any_skipna_inferred_dtype)  # use inferred type as fixture-id
+
+
+@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
+def any_skipna_inferred_dtype(request):
+    """
+    Fixture for all inferred dtypes from _libs.lib.infer_dtype
+
+    The covered (inferred) types are:
+    * 'string'
+    * 'unicode' (if PY2)
+    * 'empty'
+    * 'bytes' (if PY3)
+    * 'mixed'
+    * 'mixed-integer'
+    * 'mixed-integer-float'
+    * 'floating'
+    * 'integer'
+    * 'decimal'
+    * 'boolean'
+    * 'datetime64'
+    * 'datetime'
+    * 'date'
+    * 'timedelta'
+    * 'time'
+    * 'period'
+    * 'interval'
+
+    Returns
+    -------
+    inferred_dtype : str
+        The string for the inferred dtype from _libs.lib.infer_dtype
+    values : np.ndarray
+        An array of object dtype that will be inferred to have
+        `inferred_dtype`
+
+    Examples
+    --------
+    >>> import pandas._libs.lib as lib
+    >>>
+    >>> def test_something(any_skipna_inferred_dtype):
+    ...     inferred_dtype, values = any_skipna_inferred_dtype
+    ...     # will pass
+    ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
+    """
+    inferred_dtype, values = request.param
+    values = np.array(values, dtype=object)  # object dtype to avoid casting
+
+    # correctness of inference tested in tests/dtypes/test_inference.py
+    return inferred_dtype, values
+
+
 @pytest.fixture
 def mock():
     """
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index a13af123f9abe9..7732c5815b1159 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -496,6 +496,13 @@ class TestTypeInference(object):
     class Dummy():
         pass
 
+    def test_inferred_dtype_fixture(self, any_skipna_inferred_dtype):
+        # see pandas/conftest.py
+        inferred_dtype, values = any_skipna_inferred_dtype
+
+        # make sure the inferred dtype of the fixture is as requested
+        assert inferred_dtype == lib.infer_dtype(values, skipna=True)
+
     def test_length_zero(self):
         result = lib.infer_dtype(np.array([], dtype='i4'))
         assert result == 'integer'
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index bfabaa7a1069a4..117984ce897434 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -9,7 +9,7 @@
 import numpy as np
 from numpy.random import randint
 
-from pandas.compat import range, u
+from pandas.compat import range, u, PY3
 import pandas.compat as compat
 from pandas import Index, Series, DataFrame, isna, MultiIndex, notna, concat
 
@@ -118,6 +118,55 @@ def any_string_method(request):
     return request.param
 
 
+# subset of the full set from pandas/conftest.py
+_any_allowed_skipna_inferred_dtype = [
+    ('string', ['a', np.nan, 'c']),
+    ('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]),
+    ('bytes' if PY3 else 'string', [b'a', np.nan, b'c']),
+    ('empty', [np.nan, np.nan, np.nan]),
+    ('empty', []),
+    ('mixed-integer', ['a', np.nan, 2])
+]
+ids, _ = zip(*_any_allowed_skipna_inferred_dtype)  # use inferred type as id
+
+
+@pytest.fixture(params=_any_allowed_skipna_inferred_dtype, ids=ids)
+def any_allowed_skipna_inferred_dtype(request):
+    """
+    Fixture for all (inferred) dtypes allowed in StringMethods.__init__
+
+    The covered (inferred) types are:
+    * 'string'
+    * 'unicode' (if PY2)
+    * 'empty'
+    * 'bytes' (if PY3)
+    * 'mixed'
+    * 'mixed-integer'
+
+    Returns
+    -------
+    inferred_dtype : str
+        The string for the inferred dtype from _libs.lib.infer_dtype
+    values : np.ndarray
+        An array of object dtype that will be inferred to have
+        `inferred_dtype`
+
+    Examples
+    --------
+    >>> import pandas._libs.lib as lib
+    >>>
+    >>> def test_something(any_allowed_skipna_inferred_dtype):
+    ...     inferred_dtype, values = any_skipna_inferred_dtype
+    ...     # will pass
+    ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
+    """
+    inferred_dtype, values = request.param
+    values = np.array(values, dtype=object)  # object dtype to avoid casting
+
+    # correctness of inference tested in tests/dtypes/test_inference.py
+    return inferred_dtype, values
+
+
 class TestStringMethods(object):
 
     def test_api(self):
@@ -126,11 +175,103 @@ def test_api(self):
         assert Series.str is strings.StringMethods
         assert isinstance(Series(['']).str, strings.StringMethods)
 
-        # GH 9184
-        invalid = Series([1])
-        with pytest.raises(AttributeError, match="only use .str accessor"):
-            invalid.str
-        assert not hasattr(invalid, 'str')
+    @pytest.mark.parametrize('dtype', [object, 'category'])
+    @pytest.mark.parametrize('box', [Series, Index])
+    def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):
+        # one instance of parametrized fixture
+        inferred_dtype, values = any_skipna_inferred_dtype
+
+        t = box(values, dtype=dtype)  # explicit dtype to avoid casting
+
+        # TODO: get rid of these xfails
+        if dtype == 'category' and inferred_dtype in ['period', 'interval']:
+            pytest.xfail(reason='Conversion to numpy array fails because '
+                         'the ._values-attribute is not a numpy array for '
+                         'PeriodArray/IntervalArray; see GH 23553')
+        if box == Index and inferred_dtype in ['empty', 'bytes']:
+            pytest.xfail(reason='Raising too restrictively; '
+                         'solved by GH 23167')
+        if (box == Index and dtype == object
+                and inferred_dtype in ['boolean', 'date', 'time']):
+            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
+                         'solved by GH 23167')
+        if (box == Series
+                and (dtype == object and inferred_dtype not in [
+                    'string', 'unicode', 'empty',
+                    'bytes', 'mixed', 'mixed-integer'])
+                or (dtype == 'category'
+                    and inferred_dtype in ['decimal', 'boolean', 'time'])):
+            pytest.xfail(reason='Not raising correctly; solved by GH 23167')
+
+        types_passing_constructor = ['string', 'unicode', 'empty',
+                                     'bytes', 'mixed', 'mixed-integer']
+        if inferred_dtype in types_passing_constructor:
+            # GH 6106
+            assert isinstance(t.str, strings.StringMethods)
+        else:
+            # GH 9184, GH 23011, GH 23163
+            with pytest.raises(AttributeError, match='Can only use .str '
+                               'accessor with string values.*'):
+                t.str
+            assert not hasattr(t, 'str')
+
+    @pytest.mark.parametrize('dtype', [object, 'category'])
+    @pytest.mark.parametrize('box', [Series, Index])
+    def test_api_per_method(self, box, dtype,
+                            any_allowed_skipna_inferred_dtype,
+                            any_string_method):
+        # this test does not check correctness of the different methods,
+        # just that the methods work on the specified (inferred) dtypes,
+        # and raise on all others
+
+        # one instance of each parametrized fixture
+        inferred_dtype, values = any_allowed_skipna_inferred_dtype
+        method_name, args, kwargs = any_string_method
+
+        # TODO: get rid of these xfails
+        if (method_name not in ['encode', 'decode', 'len']
+                and inferred_dtype == 'bytes'):
+            pytest.xfail(reason='Not raising for "bytes", see GH 23011;'
+                         'Also: malformed method names, see GH 23551; '
+                         'solved by GH 23167')
+        if (method_name == 'cat'
+                and inferred_dtype in ['mixed', 'mixed-integer']):
+            pytest.xfail(reason='Bad error message; should raise better; '
+                         'solved by GH 23167')
+        if box == Index and inferred_dtype in ['empty', 'bytes']:
+            pytest.xfail(reason='Raising too restrictively; '
+                         'solved by GH 23167')
+        if (box == Index and dtype == object
+                and inferred_dtype in ['boolean', 'date', 'time']):
+            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
+                         'solved by GH 23167')
+        if box == Index and dtype == 'category':
+            pytest.xfail(reason='Broken methods on CategoricalIndex; '
+                         'see GH 23556')
+
+        t = box(values, dtype=dtype)  # explicit dtype to avoid casting
+        method = getattr(t.str, method_name)
+
+        bytes_allowed = method_name in ['encode', 'decode', 'len']
+        # as of v0.23.4, all methods except 'cat' are very lenient with the
+        # allowed data types, just returning NaN for entries that error.
+        # This could be changed with an 'errors'-kwarg to the `str`-accessor,
+        # see discussion in GH 13877
+        mixed_allowed = method_name not in ['cat']
+
+        allowed_types = (['string', 'unicode', 'empty']
+                         + ['bytes'] * bytes_allowed
+                         + ['mixed', 'mixed-integer'] * mixed_allowed)
+
+        if inferred_dtype in allowed_types:
+            method(*args, **kwargs)  # works!
+        else:
+            # GH 23011, GH 23163
+            msg = ('Cannot use .str.{name} with values of inferred dtype '
+                   '{inferred_dtype!r}.'.format(name=method_name,
+                                                inferred_dtype=inferred_dtype))
+            with pytest.raises(TypeError, match=msg):
+                method(*args, **kwargs)
 
     def test_api_for_categorical(self, any_string_method):
         # https://github.com/pandas-dev/pandas/issues/10661

From 66abbc3e6c57f224317b7beacb71c0916fc5f30f Mon Sep 17 00:00:00 2001
From: Nicholas Musolino <n.musolino@gmail.com>
Date: Wed, 28 Nov 2018 09:48:32 -0500
Subject: [PATCH 19/78] ENH: Add Styler.pipe() method (#23229) (#23384)

* Add Styler.pipe() method, akin to DataFrame.pipe()
---
 doc/source/api.rst                    |  1 +
 doc/source/whatsnew/v0.24.0.rst       | 26 ++++++++++
 pandas/io/formats/style.py            | 69 +++++++++++++++++++++++++++
 pandas/tests/io/formats/test_style.py | 16 +++++++
 4 files changed, 112 insertions(+)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 81bb420c47a999..415a8dfa029c40 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -2482,6 +2482,7 @@ Style Application
    Styler.set_properties
    Styler.set_uuid
    Styler.clear
+   Styler.pipe
 
 Builtin Styles
 ~~~~~~~~~~~~~~
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index d0dddb19f4c935..1a7f96bf7ae414 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -184,6 +184,30 @@ array, but rather an ``ExtensionArray``:
 This is the same behavior as ``Series.values`` for categorical data. See
 :ref:`whatsnew_0240.api_breaking.interval_values` for more.
 
+
+.. _whatsnew_0240.enhancements.styler_pipe:
+
+New ``Styler.pipe()`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+The :class:`~pandas.io.formats.style.Styler` class has gained a
+:meth:`~pandas.io.formats.style.Styler.pipe` method (:issue:`23229`).  This provides a
+convenient way to apply users' predefined styling functions, and can help reduce
+"boilerplate" when using DataFrame styling functionality repeatedly within a notebook.
+
+.. ipython:: python
+
+    df = pandas.DataFrame({'N': [1250, 1500, 1750], 'X': [0.25, 0.35, 0.50]})
+
+    def format_and_align(styler):
+        return (styler.format({'N': '{:,}', 'X': '{:.1%}'})
+                      .set_properties(**{'text-align': 'right'}))
+
+    df.style.pipe(format_and_align).set_caption('Summary of results.')
+
+Similar methods already exist for other classes in pandas, including :meth:`DataFrame.pipe`,
+:meth:`Groupby.pipe`, and :meth:`Resampler.pipe`.
+
+
 .. _whatsnew_0240.enhancements.join_with_two_multiindexes:
 
 Joining with two multi-indexes
@@ -225,6 +249,7 @@ For earlier versions this can be done using the following.
    pd.merge(left.reset_index(), right.reset_index(),
             on=['key'], how='inner').set_index(['key', 'X', 'Y'])
 
+
 .. _whatsnew_0240.enhancements.rename_axis:
 
 Renaming names in a MultiIndex
@@ -248,6 +273,7 @@ Example:
 
 See the :ref:`advanced docs on renaming<advanced.index_names>` for more details.
 
+
 .. _whatsnew_0240.enhancements.other:
 
 Other Enhancements
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index dda50b6a0e7f8e..8ee9ea5b3d9806 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -1235,6 +1235,75 @@ class MyStyler(cls):
 
         return MyStyler
 
+    def pipe(self, func, *args, **kwargs):
+        """
+        Apply ``func(self, *args, **kwargs)``, and return the result.
+
+        .. versionadded:: 0.24.0
+
+        Parameters
+        ----------
+        func : function
+            Function to apply to the Styler.  Alternatively, a
+            ``(callable, keyword)`` tuple where ``keyword`` is a string
+            indicating the keyword of ``callable`` that expects the Styler.
+        *args, **kwargs :
+            Arguments passed to `func`.
+
+        Returns
+        -------
+        object :
+            The value returned by ``func``.
+
+        See Also
+        --------
+        DataFrame.pipe : Analogous method for DataFrame.
+        Styler.apply : Apply a function row-wise, column-wise, or table-wise to
+            modify the dataframe's styling.
+
+        Notes
+        -----
+        Like :meth:`DataFrame.pipe`, this method can simplify the
+        application of several user-defined functions to a styler.  Instead
+        of writing:
+
+        .. code-block:: python
+
+            f(g(df.style.set_precision(3), arg1=a), arg2=b, arg3=c)
+
+        users can write:
+
+        .. code-block:: python
+
+            (df.style.set_precision(3)
+               .pipe(g, arg1=a)
+               .pipe(f, arg2=b, arg3=c))
+
+        In particular, this allows users to define functions that take a
+        styler object, along with other parameters, and return the styler after
+        making styling changes (such as calling :meth:`Styler.apply` or
+        :meth:`Styler.set_properties`).  Using ``.pipe``, these user-defined
+        style "transformations" can be interleaved with calls to the built-in
+        Styler interface.
+
+        Examples
+        --------
+        >>> def format_conversion(styler):
+        ...     return (styler.set_properties(**{'text-align': 'right'})
+        ...                   .format({'conversion': '{:.1%}'}))
+
+        The user-defined ``format_conversion`` function above can be called
+        within a sequence of other style modifications:
+
+        >>> df = pd.DataFrame({'trial': list(range(5)),
+        ...                    'conversion': [0.75, 0.85, np.nan, 0.7, 0.72]})
+        >>> (df.style
+        ...    .highlight_min(subset=['conversion'], color='yellow')
+        ...    .pipe(format_conversion)
+        ...    .set_caption("Results with minimum conversion highlighted."))
+        """
+        return com._pipe(self, func, *args, **kwargs)
+
 
 def _is_visible(idx_row, idx_col, lengths):
     """
diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py
index 6027fc08624dff..fa8bd91dce939d 100644
--- a/pandas/tests/io/formats/test_style.py
+++ b/pandas/tests/io/formats/test_style.py
@@ -1173,6 +1173,22 @@ def test_hide_columns_mult_levels(self):
         assert ctx['body'][1][2]['is_visible']
         assert ctx['body'][1][2]['display_value'] == 3
 
+    def test_pipe(self):
+        def set_caption_from_template(styler, a, b):
+            return styler.set_caption(
+                'Dataframe with a = {a} and b = {b}'.format(a=a, b=b))
+
+        styler = self.df.style.pipe(set_caption_from_template, 'A', b='B')
+        assert 'Dataframe with a = A and b = B' in styler.render()
+
+        # Test with an argument that is a (callable, keyword_name) pair.
+        def f(a, b, styler):
+            return (a, b, styler)
+
+        styler = self.df.style
+        result = styler.pipe((f, 'styler'), a=1, b=2)
+        assert result == (1, 2, styler)
+
 
 @td.skip_if_no_mpl
 class TestStylerMatplotlibDep(object):

From adc47d8f924cab0f9b528f5bfb8dcc3c231c434d Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17+GitHub@gmail.com>
Date: Wed, 28 Nov 2018 08:21:25 -0800
Subject: [PATCH 20/78] TST: Test unnamed columns with index_col for Excel
 (#23874)

---
 doc/source/whatsnew/v0.24.0.rst |   3 +-
 pandas/io/excel.py              |  47 ++++++++++++++++++++++++++++----
 pandas/tests/io/data/test1.xls  | Bin 28160 -> 28672 bytes
 pandas/tests/io/data/test1.xlsm | Bin 13072 -> 13967 bytes
 pandas/tests/io/data/test1.xlsx | Bin 12982 -> 13878 bytes
 pandas/tests/io/test_excel.py   |  24 ++++++++++++----
 6 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 1a7f96bf7ae414..7e63fff16bc233 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1428,7 +1428,8 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - Bug in :meth:`read_csv()` in which unnecessary warnings were being raised when the dialect's values conflicted with the default arguments (:issue:`23761`)
 - Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`)
 - Bug in :meth:`read_excel()` in which extraneous header names were extracted, even though none were specified (:issue:`11733`)
-- Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`)
+- Bug in :meth:`read_excel()` in which column names were not being properly converted to string sometimes in Python 2.x (:issue:`23874`)
+- Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`18792`, :issue:`20480`)
 - Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`)
 - :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`)
 
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index 1328713736b036..880ff5a56804fa 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -662,10 +662,14 @@ def _parse_cell(cell_contents, cell_typ):
 
                 output[asheetname] = parser.read(nrows=nrows)
 
-                if ((not squeeze or isinstance(output[asheetname], DataFrame))
-                        and header_names):
-                    output[asheetname].columns = output[
-                        asheetname].columns.set_names(header_names)
+                if not squeeze or isinstance(output[asheetname], DataFrame):
+                    if header_names:
+                        output[asheetname].columns = output[
+                            asheetname].columns.set_names(header_names)
+                    elif compat.PY2:
+                        output[asheetname].columns = _maybe_convert_to_string(
+                            output[asheetname].columns)
+
             except EmptyDataError:
                 # No Data, return an empty DataFrame
                 output[asheetname] = DataFrame()
@@ -810,6 +814,39 @@ def _trim_excel_header(row):
     return row
 
 
+def _maybe_convert_to_string(row):
+    """
+    Convert elements in a row to string from Unicode.
+
+    This is purely a Python 2.x patch and is performed ONLY when all
+    elements of the row are string-like.
+
+    Parameters
+    ----------
+    row : array-like
+        The row of data to convert.
+
+    Returns
+    -------
+    converted : array-like
+    """
+    if compat.PY2:
+        converted = []
+
+        for i in range(len(row)):
+            if isinstance(row[i], compat.string_types):
+                try:
+                    converted.append(str(row[i]))
+                except UnicodeEncodeError:
+                    break
+            else:
+                break
+        else:
+            row = converted
+
+    return row
+
+
 def _fill_mi_header(row, control_row):
     """Forward fills blank entries in row, but only inside the same parent index
 
@@ -838,7 +875,7 @@ def _fill_mi_header(row, control_row):
             control_row[i] = False
             last = row[i]
 
-    return row, control_row
+    return _maybe_convert_to_string(row), control_row
 
 # fill blank if index_col not None
 
diff --git a/pandas/tests/io/data/test1.xls b/pandas/tests/io/data/test1.xls
index a5940b2cfa6c2481955bca02e074fdb910c96c67..faf5dc84700c9342356499cb8de5751577fcecca 100644
GIT binary patch
delta 1529
zcmZuxYiJx*6h8OPWcHQp&L*2=yVZ6AQDf@HeWjK@vb%ZgwkttJ5ITr9YJG&1=0{uF
znzCt2p%rQC9)hjZ{FEq#nk@Yx^hX5|1jRoQiHdD$eKjHGpQ7k^?w#8(mCSPHob#RU
zJNL|;yEFL%o4m=+r`YCq((CnJ0GNH|3PXoAu?1}6?+5(AfP)4)&hjkN@F%{V{f*07
zTO$LfdX19;KmurMA-9ARtI@QKKGHg5B_3>q^9-n^lnBt&JJ>L0^d#N)$auU#^xh1z
zH-}*yZ;0bHJwOBz{dg~`H7F?9C!5lkf+56tuUG&%FCZ3lc>ymR$1hdKHe6~5xg-o5
zZWIp&ysN~6{?)Ypf-&Pu-Gmsl(_V<}f-TW#!75lLziv(VV=&>D+oW#&@O;Q}FdHl2
ze7GHVxAsWCug&$x+g(2ucm0R!T>n<g_0x(!r}&>M{&$MMtoUCm{%ytoL-9k3w^8x3
z3E8(s^m8;$Owqd(eY<M^w35A|WFJI&v9sAkJ9-K$Q4@2`Q+TMQ1AAj8jzorVB)o!_
zIfXZ(L-;-AcblImIE$B@Z7+8DX{*G+nVXw)pjxdumSqLjz=HqB@MCizDb+qz%IEW|
z>U(V!Ce>&~yYUkmGZpT@cUyK-oNi6RvHM;~k!Im&#uN2d6TD^c$X4;940!hSy?YO&
zPeB+?<UL46APa*YBrA|_N**MK?<GsVtB@e98GlKh9?$_^-anRhU51|ijWqoWM#v6}
zBtyYVWJes*_@Vq5KqF~CBY%ec68YKsiJn7E8y0Bm3>rVgV`4#hiKjWY4dAyv94(FG
znc`Un_0}DiG~*(%e+}Ynp|YkhK*9?JfWASGFy4RHE2Iy>aH-w+eF$KGsXkhIlQF=b
z%g=QO{3dOa3HDQ8`PQ0BqvnfcfGazx`DL90b1+e^R0R(7NdfS4>L6o_o}Dfhk(fv{
zxE~V-#*6&}K_P&m^lru}3lS<Z(W59g(ajh{ph?sQWsy@FR#eftNVzMdIVl!{2_dIn
z1frCEOW9@eUGM;O<Bf7EY{*3z9TkdL{lT3Et5xO$lk+pL*(c|jR=-u*7{T!|1G(M9
z?qJ$}l3mAR_5joIQ@b<hqi*t$<?xPeF`f7;;o9ii#ZD^x>p0HC<R6hs0z68-n`{#q
vod#L&A^$j8iY!f*A<L5G>R*gzS#9i3zHS%(HFlIega`Lk8<SL$4@T}k8>Uzd

delta 1159
zcmZuwT}V@57=FLAU#FYX9q0TZmCV%KbWZI@o%1)*U09J43{prEk|kkXl$Kox23ES4
z=)wh2NP>doD3H7;L4rua5b2^KuLP0uPATy8p3~0P3J*NzJUs97KHuN@hQ6_(&upxO
zm5;kKv}FMB{M0N%iJ937nuSLOJunbJgAu{Q1l$m+*e-5s?07;GA7uf~ZgTcG%0zsv
zF)KcYDFMb9P#VYqIR|$QtR1@H3S59L2ol^-2t8D;P<%A#1eR%?Yy#VKqD9LX6&3`V
zpj>FvIWVk^Q;K$6(8fpESCxfyQ7Hky`!|sKOf|7FIe!|-RrZeyXLM@%do>EbN14N9
zjLoo9cU)|@EEu^E>C{gd90NuX28=X){?x+9w@Z7tl;(N*(9ZC*{VXnu<@hSeinkr*
zxFcHeS?Vy3rdY8q&4uq$Iuve58O9<<qf!s2JixB>9DM0G>YJRLlq%2X8$qNU0+^ed
zlc1)iM#|34j@w5w{jXuBy#jaB&Y@`Y>XZ~bX7NSYTB6GOlT_AWQ`sQ*Wmr{hp8LYw
zx5#~++_%Sl>)dDHqfK&OeX^RIC#uZPeVg1j!hM&xZ=Cyz`Dn|@J`JrhGMlcom2MrE
zPel)Bdb+#&-NRskAKn<HR91WqF^UIwoK1$ikW88h8(bqciMSCqj{wl&!^I+iJCVfV
zYvuzhbR2p*_;^8-1C&Sp4}?m9fET6aKFqAU$Arj}>aYeUnp)`M-LFm6>wEyy^)dD7
zx=p3NZzA0rXm12qYov}DoIUvg7h4K(s@aKYtsA)Q{}S@A#zDc>yl`WcX(H?XtVHY!
z>QM-|*e7%cTG<O64Ae6Xz6s>wb|6bJ{j!95N2~WFNjgLW<Pi=N@(BfmLIN!q<hql-
sE<!P(gy1HW5<HRRUJo1Z)P{n)LRAy~3|?osc=htF1ll41tNa4^4Y#8f4gdfE

diff --git a/pandas/tests/io/data/test1.xlsm b/pandas/tests/io/data/test1.xlsm
index 981c303b7bd30c736f0e67ee7f1302f2bd738fa6..f93c57ab7f857e1c6b7654542e2e9bde54ae80d9 100644
GIT binary patch
delta 5483
zcmZu#XH-*Lw@s*lBvK3=>Ai*$iYOha(m{GhI)ol-sF5Oq1W>95=}iQJ_|c?E2k9b3
zic$nYFdzuR!~4d&cieBhbAFtC#+rNVvCo-nuDN!`{d6BjLtSDL7=R1_0ssJ<0JC}S
z{tO@hfFXOu$VG%U8`STCQ@3*;P(6Mr;BT2g?fH>-m_`W5zVdPEU6r$S5v~b6&O=I4
z0X7q!&@Fb6^kY&OU%IFXnTOGST+#Kn3Q{MtBX|DY0uvwqd73?tktI7sN>%!lI3;@j
zq~h#WdJI!&gouU)>}9FCtx9b@i00P4JDf8qClYC{rs2itcO2t43w0~w`M+nG#B+u(
zSu<AN)0|&=XfK`xsb!cSU2DZu!G!L(;koFA%9qc%27BCH5Ko`mdtt=je1>GgfGS%W
ztm``hiMlr=(Pk+3?P^E~dmZJTccP;!cTxQj9ptVnzB;`_)9#TA)P*>EOJ!!)cB~*0
z>>&wpG_FCDd)zT`z9t2&bN|hNpA=jSLj=7=xc)BhM(FwQ4;_d6A|HCD+Yyam<`?~*
zb93yPdjDzcR?7+hjb60;!0~_1X5AfSyh%#l5HB_^kZ$p9=<vb~dQT;mTVwV=>;xez
zpvFl9Z*m>Z1S2t~Q-zvjaq9FPPM(&aNQr>0y;~s=cjKiP%WlyHlaim;us8^;agvyi
z7<64Q&4L&JsLUj1MlZ-wM~T;8a(uU)u=P(INEGR>h;0T<E~LiBmAR)Tse5V*nHv&5
zg=)<Y^+gRd$K5q9=$gn~&0U>o#rYIO2+>`?rP-C3&N`G_!T4!&Yv)O}g4(0Wr}tWD
z3O2T4g$TYE<N4L`8KKskd#r#JAx-?O?M7J2bW`6~)u60+S2Vv%Dxz(uz5J%Of2pTV
z@B#|AUv8Jmq{*$yXCKcQqHt%+IHRM~CY>Ww#)3G?tWm%4z11Oe3;l3wM_{tMLd=H-
z%r-eVr69*;=Qaba9}6e~MC+FEU3ZoJ1fEbX9=pA3BFb$n@M?+y@GY%me2pqn98^^5
zAUEE%li_4AiH^8GhIHbcQgX@Gx7g;kSh+-40VW%)206meNjZ9ps?wbwUtywIN_ST3
z&JwHxjiCX|HH6?k>DquGikEB3?6S0<2Rf_?H9#-MljxYXQ=UzhZBvV>m0ET+`Iuub
ze~TZjae=tnp#T#96}wbgb?!=I*@s(f^zQ)5&-05X(53;(f~0dCQ_dgI!rG~}vqY3{
z-;`b+k3_>hlUK&jU?F}p&{_P{FrTJt^Jx`_LOyAqjkm7{KWs`>vPz~<aLZ#d;|AW*
ziTLsQ=ylumA6&oKhu-ym?uk!$Skiwq{236MTR0u@qLg^&=#aQz#VwL*lVZN^(U%Sk
zF}Rq$06idbav-I%j-(q?eQlAQ$a0q2Z97ujEt)#K(_q81?L>bW=irh@10*+RFLinz
z2vf0Fkc#T<8Kgb29Iw)3R{FiZo_E@ZZ1^n+%r84hn}`pGWrqc0_8;C`bKnj~H7I`g
zwqG-e*pkp*R-I3a*cV)wdyCNeO(KI>US_7)KnonCktN^%NdPYsxn9?G2W@UKO?=S`
zB2D+gxT&}E>-pRxdlwSYQd=@!X0kvVGrz$m#4q(JV`qqdG*?zyqt&g=@pjk)J_)yF
z<%mPf(dQe#XBDE?pgV&K%|Z&K%1E{6m{IAWzVs+}pgM9=m!i)i?u$iL(0lf*BKs}~
zuW1iQ=o4*5TmInzDAw#V{UCmy%melxk2Xqd3#3PmBCTvB`w5;ShvD_IU-gT=-WSBU
z<FWiSD7$Ca0HyZVqe)8yifXB61H>Ae!6YiRA2d96N&F|%C}XW#2EkWK=t$BSMJWhG
zXLWh@z#*9Z`1bgTDF4imCC1=z!FapZdqBP7UR?4GkzniPXukT8*i2hhF*5CwOC_H9
zg{9DiBm;t}LN!OS6SaHbz3FD-@BWcdC^D5A9~_IDkxDvA>+Xf`876H5nXI?;7$lSf
zW8B!&&>4a@=W)&9pxKkB=7FFnUIldFv!F!DwDmLs8o|Nhk)nos%br4)<ot<r^Ac}s
zIV-<s8|^!B2nC;hIZuXgksEyzb|HNN8^krF5J?OyzzqVsGbY$lLyvKDL8Q#3<qngH
z3RwLv9&itj2|VAjzc=c7&P4-Uy4U15J@8{7N(U9pt}0)E6}1q*m44)>M*4Znc_145
zik_|!522x?NI`{vV<7?n>apYWqG)nrd2WYnjK)%`j@iudvV@uwDI8yjt?Gy%HwGD*
z3R+Qs^V!i-mne=%`6s0EaWckcG)JsYXe$ziT<~dI>HFL6D#@?fhEO=6q%BfNRI*Yu
z$UtX*^x9im9#g(z1+SeL3GNWXfb|MyA=7|TU0?so*CW%+s99ej@J8z%`eC+>IY9?R
zkb-1y8Y^G@kAxvRPG(A933MYCzwKU%BD)vRt~AY!8)BwmvDAw2z-47|p%%<_e%y`i
z7++2R2h5^Q<=)voJw8<{F{Z42I8+<&5;A%^agiy?#Bh6i;?jiCuUnecH84gt>#`%O
z&13w~ul_*6v5{>yOP3r?o~uFl>5Z@yoBZOGg>ptqYi~|XILTY4-(MaoT-UOjI%Q{m
zklTCR*M<ffrhd;ZBRs3maY0Fmw_wCUKQCla0PmL|3t8vtQU(@#K)Me?5)d#Ab(5g`
z(RjTAu8gDJ)wUPqKE&u5p7;WG%CozLit&qkBGIB9Qfkh3-AK^$yg=@HUWukDCu`4c
z(R8(1eG-GYp0^0U4m$CZPw(Vlqt(KZ3`S(Otj@A7ntFH7rxc-xeLHI^#HF1s=HrQ7
z#0}5X2<UM1u<N`R#5S0;^<&K`eFw5VJDZdAvyb3L)|MCEhJ4d}=5FkUiH^*Wm03vj
zjSD6~TI;oAojS^_Z}g+UqszP#@jncqY#Mb2DqoQ!ofP|?o(U~2hVt<)$Xa-m52wcL
z3m@GO`9$ulsznd1=4mzhS!_Hr)$R<j0A7{Ud9#_&{io6;gTAHGv|2JnryI|R_4l6T
zGk;5vF~$p)6{SCRe>3E2U3RiCH{*_Z{HkWuvexSH8iV8asN)OUhI`@8LYyK*s$Rx6
zmJ59ejCRQwCIfVEaUxcA8)Nz5YvxmZaL!NtyLX8`rEB%Lq*5hV*!LJ}aK1qqMlK}H
zvT;~VkHkfr434pSKs(9i;O+Av+;w@^sJ$$8&nDF*JW=PS%{q<2&@iQ`41Yq+D#gkT
z_8!6~dks&XxuJ^gaAiL7(U()5+@T2vv4oqK)o^JZt6g-7*9ryQB44sTnK?2p%`ZWN
zcqCi5-#WMaWHZ43b#dzay8C|$>h3s7Lp6WQOEMc>9{4lqyTeWh3}cx#st;_<wNknI
z0-448pSox(DSayakl#5w&~U_H1okEEea`siz(yGFk1_Z@NM5`RWt}*ebISzg@s=M+
zysdh+YQT*CQtYtsG1~l$N;M}tQ7e`~#n)GE?fi=h<24hCi8hCfUvEW*X={%`%#b0y
z{>6P&X91mbQ2TfGB$3rIkJUC~$PHjqta=F3(e(!6+Vk|mL+JIJPpOj>wE!cRx=$5e
zf+@P*B=qHW93*x$_I&qoHOyeil79(y#e!WO0;+6OgK)dM$Z*^)9P)rZqz1;GE9UBD
zD4NnSYVwfaNE=BtYaU#8Iz@5E%eB_?Q7fgGBCwWl8+1>=HJydgUDBp>-tRBn;bdYX
zM_1849l3L5eo$9_6?$b^QQqQ#L1EsmfqytwV!q(}mmzwO<~XU_avH`%wKZ;ND#ywQ
zWb8WGdn7j`A95L*3ccRy4AmOUjJu!D5%o&0C$~pRc5?7cA4yOS?#a^gN6|T`yW({C
z(giOz6@Sh$A}|A)NM4VW@un{He&cJu%y$Ly?FvSo)XD>Vef-W}YxiaQk|`%`D)V$c
zRA$ll##E$7n|SFAy}L=Vr>RnZNa{%Y6J%}FMxsvr8(-V5nw-ZN;!u#=*wd{ATS(We
zl$J+6|Cp$2{9Mdt8x;I2AZh9%<dH+7d0N#7-O8`3Dhm~KdFz3!q97j+S~>cLG2@vE
zLEu=wlh$L~>(^WFQ)G^*P97OuRW@HAn8VYg{<qMKZ0>?+#dn8n-qBEkvYb27u3Ius
z-0@+j=@WI@YJ#X;`px;5_V=R!mj_Ajf5B3O(;z|Quzrs+wU-ob<fAo~8YJtRMY=9@
zbvUwsIF&HBY1SUH0>({e^vdHj8ki-~4X276d62usr)OSrcB)c8;bVJDJ1_V);VEv{
zIuzi#*L0r1Z#~Icz($zmAg!1jUd|u5bl#cy9%1-SIo?%!aKK2r+b2hyEzp4R=F^w3
z{OcqAyrCrsPpv|~RdRxx&kb_TRZElSVUDLYKI|h?uOKQa9+X3;g=pwTr{@UvTX&sm
z1Kk@q`!K8r?;P>1A)y#DtjOQ)5Qxy+dF580datXvj`@K7aZ2%%dgdjXO0Vb+pT5|y
zi%a2ZwHM;X;7ijIPRO2eWq(tn*Q;=k&2ZmLC1gle+%cae5q3Itt3>+jT3yHx_(M68
zqT9^R!VzKUG56jUfi9t=m9L(CVU=d78RB9R>D99?mG>aWzvuJyV4{(Re4%|Y7H4ty
zx$EnK?*Y8ETEnBiTXNSte&^=Y(B)5j1z+u7r37X5(_EK@;oC?#86QbU#f(Uwl5fbq
z%vt`e!`Uw1Z=wv+$1(@L`+dVZOVQ8#;sjbQ<Rkbl)*d5YGCMV)S{7wki)e3?C?ud{
zw~cRISDbI{UV5nbz9{gMdi9DLE16vfNY(ERmzQ6=Rj(b}gNbPT;!<zet#!7}LkWs=
z`0g`aUBzW)J^{Bw<7(Z+4J!fKBs8cSvnI4^fM#m2|C4JJXfE5M^Q-jA#d319qBFbx
zw1MjTTIccj(62lXs#G5dgG|@n0Tti!_&N#BL+!}(I{OWI5~i#lo!x3Yepqu{j$>e-
zZR8f={N%zRGCM8jt&571U))tj!aqDMdY=Qg#<X{I7>}r^iw98Hg4S!ZY(hieZMX@i
zTk@IgdPy(*c+;2jFbZBh?}xe2!_5KkYbw*yjsmz3aZlg38#LdHk0iHxH6^sAz#DIV
z7$Fyb1~U~zX{p@(O~u^G$kp;|K9$=M<>}-??a1aVS(5VoHATjy?|GBg;^M@#td~p+
z{ou>+9_Nn(tEw8Oq~;|fqNaS=1@uLIIj>D6FM&**ITu&;>++HqaMd4Ut03S;QJzp@
z(mt{SCvj{gzcgAwzekxG(+MpreqQ$Cd71|iX|aTk7LYb46Rdu~rO8D_8ZQ`Sxc8Pc
zV`W^9<~;`qjDEM~^L*I1M{aGGH%o`*&v-<buu{0UUglbJz{b*s(ZLq%ve!kCjp?`B
zlk-e^!kftZLl2cYn|Nl%sW_GBzuMN@78{r^@Hy$)>9?WrW8XrhPM1BiyY)lwz4Ev|
zNg*#?MwBnE(8Oa3aGYg1YH{mLgQIx7i$Ljas#`XbwJn>miA}nrl1?7Rr{Rs^+Ri`3
zdFBR+>Wt6=zio=D5wyQjAk!n1pTCCV*q$LPj6Npuy4g(SxK8zd2)_;WId`U$Lj~L{
z1sBd3w~GnL&wjG>-dskX-W`X~+)tq*C<(tW?T-OtY_>7(3!)iFLEC5C!h^zp*hHEF
z*L#!H$Ra2OM0>k|Z*ls^`fj;`;m-vn31vFoRkJxH$%Q+w*?stZZD8%m=Ys~H#cRdw
z^KYJ#b27WhirBk|0ILgRj_Vy?x9&8#XsdhRYYG#gI~)zeUx)jw!f-Y*N?l<fcMd-F
z1|xw^$MhsSB-;$Pw4_)?cjn28D)-}V<ceLDTNuC&?J}Pt8)K}S)ka<x&DFqdTurfU
zuX-EMv}UMAOwT>eAdjxr0Np*WcQhOmVu>#-FMPCK)QD&9tNew?tE~4~&}e0S_xv>Q
zBJ}L+z6Ao&{8I;5J!?fFZJ~zW@YB56_vDP<P_C>zhJ;SJ7(7!{vsh*S&T0#@jD2r?
z0kPdd$32=>Os%k2Gwni^JYZdIBr%(lT+6XL?uzO7S|yw@3w;_&l%^<$&?mWxTk6}I
z)}9wfpD)@rRt+598a4MY+Dox<DLrZa&yHKs(NDXF$s4G%&0a}f_g9NwN(DMnU(Pcf
zkW^sy`Dw0+1-)SKWE<_cf5iSblS#1E5K0QZ-6HpjtJWr~@slto5PjEg;ZI{je9R76
zVv+YSgv8c3=~@%&oq(CmZu(`;xMmlvn#a3y>CHxS+x?}onc@d)oPw3ZYPP(h829$!
z-pHI3lEQ;G!Do7l`@Un*Nj$VItU~+n>zWVOjM0~6GF`Ha15SD$y8HC9)JJuO+;wU=
z$1JXlfa0`r0j3>I&~Sv>h5fL3!Tm;S3Y<)?`ouYU_94azX@e)2O)cg>5?9=P1SR%5
zYV;8Dib3f_x!wIf!H{Lc4l&o!*XRqfmLBh#G%DB8H^CzeSM$<>foAHw_TaDdIO~sO
z-bq&YCeMisD($9Aln?mvd1BHHeLgD?J*$NIoD5Ht4Fdf*E}F0^n3Dv{{*1wWcrdl>
z;r%3hUKWY`_<;$+AyUA#Ct}8U31a~ffCm9Oi&Bs_;<4uSe1p?FlXg~-VuffisG1)6
zUg*G<pTqv!E;eGYbJKA!@!mkG#4n#ElQzGQ3rdi^eu$On0xi)-<d=gIXO*QxylNj`
z=Sk4gg_`~kiL@QsMYbOdsXtYJUa(;d$~GAq3AetKBPs<?e-#qYS3>;1QUoLt1^j=9
zb=WpBBj6zRnm9Z2{~ZpI0{~Q4^ZvQ^u$tmrEdR8%008qp!K+aqHbGny*nu4p*8p~5
zY2m^wfA@GK0085^u;W+@xUl3uHvhll987;XvrxYPuRwQK*PuZ0zaG-qdN>Rl4`(LI
zVZlCw^RoW8oBL;xCNAs>oSpUWI&`JRuz#XgBVIl%i-a<f2|u<@f(46`kR~z{xEdM|
zSqNg4`2?{7lGLn!$@LY{sUZLW|G#Dr30;ABuqa77*1yE*3bc8(1kJxdiXzxbNpAJO
v)8)!`QvVC^TMPhj@pCo|@IwZQJ0g*PJgA{ADcK(o&DC9W_2CvI{=EGUTiC<d

delta 4371
zcmZu!by$?!)*lAxE`b>Y24-M}k~ToPOF)ngX%HAvBws=rC0@cK!ZC)9Aq6SV2#6rv
zAs`)x5G18OyyyGQx%WQb{%7yC*51!v&t7Y<-)|**q&TtZYZG5#hLA$YArJ@xVy4^<
zBoaX&=OmSEJW#->Q?FH&zL|H2E)|dV#xN=sV#qRijogkQW4Y4zuTAahXrCPF7%GG}
z=(A9W#Lss&IjV&<XFG0IksxabS@TV+anwz;pMJVxa@FaMGd`!J-{>Nt%QGcP$Ua}w
z#MUH)vj+qVsj4#L^Hi)9%PPnjWKH-GBMC<mx{gLc`9MB*V@_}0FoEfDR}q2!Z04HI
z@K=LJ9~KvC(<=9Q8m%tI^fQ#&S4;1=DJ^}>#v%is>y(l*?KL^ALhYfB70_3cEMjI(
zI?CL3_UJ{m-Yc|pyrI6vUe1Elg7BTBGVB8vfhnuz{X09i)1*>pjNYglR|mZJ+eP6f
zZay)qgabHs-KV#Nr0U=7+YD<Jcx{i26OE<sZ&L<{*5;t6#*XRGAId`xrD6QS-y?1}
zT&Q%kbFFc=SaDCS><GF(I1qxkE<u2aO_(2-Zyw`w#7=Y0c|&;Lc%z@!b85r5&w&}=
zzGIqVm4|ZFNh4LR%FRC*S~{^^tdI%X=*JZiRg*<nyjz<FNWc_0bM+W8A2E<bJb0HF
z0tqIAK<F+5_SpS;0M_TB9Txjg<gtf4&Uni6?QP~zuC-I*T^SPPR~P~IynYv+9uDK2
zDQ#S;{YzUG_h%lkVz=`%_iSXd1LtiC13Ng@x$@w^{9x(iwMSEMzGN1U`jL<dCzH{A
zOLyPB{VyMX=eY~#5g61U_yBu<NCwHDtV}QI6*tzV2XI2jbKXuq7omdcJ{)BX<&)v#
zu1YONk#dem_vhH*2BqxtJtEQ6L$6SluNV;bu!NLsnu5~w8tiYPym59kVhT26O+#cg
znJpycsP*#g5P>OL?$M^XWkLiGK5xQD5XbW;=^&@qJPl<F=G2Q^7w~(`^hD2k#HSdS
z3HK%KCusac0vx1lAJ7CZ^Qbz6F0_5?{d6m@V8%J3WG^m5E@JLGen3>?iz_N>{tRL6
zvv~FO#8S+SjVZKMvfuOvT&>IqHy$x*Y4Wu~Kz-#qI$*<<M=^K3u_X5f{ZlVCJLc|Z
zJ}m~;s?@Elb5}MXz$AXj6SK9fq_JR>$@@8KPwgr<ti7gXZg@|$f(Qbg?uNIfL(-Kf
ziN&W2&g#<q##Mz0kQw0<{K{4yw>+Ap=82O-88)t{*;?wBK-Gu|&4`(m7|-H*roW*t
zFN<@~h?z2%4DrZ-pomGzEwejAQn#cAu;^@ki)09;cp4|rflnm<YWzrl`>6qEQr43C
zIH7Nm^MI+?YIC#Y2~Fz`ouY9JAUkg-mwfdH6%b~ua*o7k13}OD!y*dChP=!(4MO&r
z(hOYHQ0}J9Sqag-2GlE)nyXCgv<w_hcksC`vN0#g*b|c;#K91sJ6${W<>nq(xbgNz
z$jX~EnJi$SwR)#l7g)(CQcTwLA$kX2krQnxixAXny~0OhHfv6?jHW0fEt)&rpfYRy
z;4YGidij=B<xA=+RIbItnw}<C%)dZf^iebaOHghKoHT<3nFM?3InBP#%#i5mbmN|1
z-f&~%GEsjg%3reJT=F2q@|w&v2ijmz-z#rZjmZ58NUx)t_o~^@77yhK{!vK?8FqR^
zap!rsvQ#gKZqe%+Pfk<a=mY=k?{MWfUUOhTfjkL(nhwTx*DFh-Hv#|TGsA7+*F<o%
zZ{4_`>_<Y5vb0H!b>_?)gvCud%h3vY@`+I~D%!*44FF7rvrx`ZoS~tY1#y_TI*}4X
z1b}^Rt)kSQwxgP(2132lJRQ?-Hzz6LEe;`k2J`)G(wPVRSb0#nL7`LRHER^|G*rI7
z;B2m7yN#99Eyp+og(0uWD>0u!AKh%0la1mYeK(j>U1+Nisq78Z0j+wzrz-jT$TtZn
zl9-8Y9B$ASkMZX8csVSx_rjhXw&o86j?#3=>eh*$j)dFgW1eM1dX-9Z>qRB^UOa4U
z@YD|RUUE*(#G8J`vi3UA-%42T=N5c!j_l~<i*Mt`w=Uj@>CCdNg10(oi%v;B_`DiE
zA`|;usiD+u)H*WIoFyh!BqofK`sse!*}{thj=L1sx9l+;0h6&Wr|ovAAdm|Xgfmok
zQ!`LfU=HE7C@2K-2e^Dy7>J5^$xxvMs!k_r8Q(p8#{LJTt(#1&@kqdIge20y2xCrx
z%$^OAxad~c^D7msBJ;qx*{wHnqWZ}*I`iY!`nZZD3ffF(9GVPi+V)teL$&0>;%K$7
zx)in)j4-C5!AWU{?l?v_Ps<UdD6Mk89$`;^f=)?<^O?4PJG3twsCw}wCYStm*pt`0
zmXxM~l_jq$`Qu^|EV^gYtE8OGg*gc$me_zYUZZIqJ6=Ion_x7mmfR*m&9$Yd|8RiI
zSMeDP57{2LrE_sAVkjJaUu|hrRa|B%1Pc(ZCM8DrZC@ag<egv#RS&<Burysq0pb;r
zRPK9v<9^}(OM`QOo|D*_gw-?FAAVv1qvvR^PiyY;-K+~468l**)Z6>Mw@<V|G&nxj
z%QBGC;kOoBbk~e=|HlvQj|9uI_@(S^{fLTiu@J3>ov-NHP1=@kymayT^8F0&D}3k#
z3yYuUuMVGX*?(gsu0VX;|H&b|N%pgZk9{%3WCFYXo!!9zFsFvs-;<ow)q37sP9Wzn
zpx!(f&uv)F<WId%7CHS%Quj+EwuA2C(2g;FYn8E&r|VT>=)9_Xa#1`mS@5vu!^r(H
zbFY#zeC<HlK}cB-R?58<mDT=A%+FtBywm9m)zL?P1YB9*^NC?MdZ559P^7%ftr6P8
zm+qckTP(NDtaW-10z0&PnPZ6wp7EoBo!{w%S0}?I$bh8%#|Gt>d6G*CfzbQ~S-yUO
z?vB2H0hR|gTa^A>&h>(@8{qaT1;wB*teYy}F@k$E)N<RFu_R3Klb^%T`AIK5;~Tv{
zM?)zBRvJ6{nvc>f{S9C7JHg706QZcNc=IW(d}?)fR${+q+Yg%-YYEiYVP>jHZ5#pY
zEb{3z)uRvg-8|v?x(NZJquNR@>^hJhB7Qy0FO4QO+!UoO28Uw?s=O5qQ~D2`RSe6&
zhgYPx&xUKbaZXJN)|3`4!A0t)4^7!R?E)K(C;Oj5imm7ep#FsJCxsFVsOJ5NJ87(H
zHyWx~!?pd4cOvf;G;&iZB{DH`f1m|0aq(QaOEbTj@Tj&`@NZ4VR^tY{qzn7YsxY(>
zg@S}#gEg!`fO`L2lu4%K>;Mys?T%O{qbdr%cfC8^$iNv%i&eOD+-Kvn{T4>PprrGe
zIz}j>+|0>{wqTW8e&gY4RwNPJGx{xSb6w0<lxXgil=F{56E62YDkM94vC{$hLJXx<
zDOqz<IVa{;t=noARJwg>X+vfAjq1BQ;-VmH%sO$wH}-5wBsR*GiZkBV)Hx>19oN5j
z%7gTwCu>slKDt*!6byw!mw;GREB`k6MA`?8)=vJ^>NVx<=j9o2r`_}YhDvr#;vP-m
zUIFiJiubNsJ2_NWMWe~AU4{Uk2k(FCvOuag^!zu~2OFHfnw&nSO0RQZ@rVw+$(etI
z1RPM<rTNwUczM?;W!w>x&N@{)-v2<En3vXO7?wNFb)zI8abK4+$QO`0e4~9eIPr1q
z!Tf4at@R3({2m_A%j^V7)4~oF3!+>m$W3o9r)(hFPe9>zrlqLt8Lig{h2XAGhThXd
z?{C4!d0dYNl6squH^dKiZ<3EyyZAG$>^uoR?aEjHx!E$ac)6|%XX$uJ9k^}u4^!$w
z_vtiKDw!7dI?~RdzgQSW;^=tF!ets$fU<lXKq}#qA5+I{Ycwe@%4l~GA<~^9KO#Lj
zz7f3{eaM^lh_&&LnOq1+-+XWcWS*VWO>g<?EMKcu$%nnmrldCfVufposT(^R5oQ}u
zu+q>mZKWW2biVsBz7W%ll%g&k%BdZsy-7Op{fUtPS+9LRk1y7-i|sZ$y}!;uL$T+E
zB^ytKYapL+OS@zK<Cm4Uq9~id)tsNf&Lc7>!eN9~MoI1O^;d)oxFl&%Pd*)>vWY>t
zfOc3?Zhl%){e#;!&<*(xI?Ewp;=Upa3B*BL;0W#AFNqr$;llVmn_uu({eVA!7<9sj
z6Ny*<hLI;CVVApDjjuKkk{|*~ak|6^?+DLTM;cLE_S+lzn(oEznOEX-{;cEj;MbF7
zZjL|gyt{b4Y<p&1;Dk!dY+G8Vn=&(EXvVZ|wt}lMHl)4kNf*0uAQ^mA$u3~2prn@8
z@PvH!4ITY}Qdit!+VF!`Yx<}~*)`Te;}-_eygL&e=7?zTzNkU6S~5A{dIAbhlwnZ#
zn5?^1!$BQ4z$@PLshEtd=XE5EgHOaM^7n@fb~!U1s~L%1lXh^If`$4l_qSx;rD58S
zgxp^^jSBq~a|t5=o?IUeOO{9dtzB)GCSBsM3ZlnOLvZ(k&d$grY1hQK71i}u?T=ON
zd+wY3$x#!!t+^^BuP}84xG>h<<ypUx;#Whoh9EmaDvo4m)#qh}zO&t$+T~3Dac5aR
zLTx(-#s>ulwtT$Sak9d4z{H&jA8Ybd)-NF~lReQw_jjF+OR@t8X9LBM>kFG)OeVJ!
z+wDw*7Hd<+>R8*DUTu8%U1|JeoYs2PRr2`7lKf@nU0e`DE}J&+H#il=&+kYw+$B2+
ztY*HBAOZ-UQ@^s2_<%NM>*-4ebd19N$~<_GX(dFnQG$P{Qs076Wklm(@2$tMf5T=K
zyz}>?oz(Q<o+6JbT3)kl%1X;c9%bhc*Xc@nx>&pkm#P|tCB@SK(>RH=`ho4)BC9<j
z`zj4~i<<%}IfmmJn)#oux)7@D^L8tv+gxR}02|}cZ)G?7gd311fnmZs>HQg|*3q5%
zNzbxH&hV~lx_#=&vA(vpWpdH|ro{0D!-u0PcU!b3vOt1b=ljCvp2^F%CjCa=r~NS6
zq>-$5w6{`JE?j)5IE~oTo{D&iGtQE}_F~aq?SK{UfvWmlVSIu4O_U1(^=W<TlS5p#
z0)ScPl9Mxd`dmZPlPyvC=5|iUL6kKe4Tq)l0VyvzRaP8XHVn5tLTmSvY9~lfRy3)J
zx3^`*qW73@P|^I%0;aigk0#Ns<%Y`Pw{LN&(U{4f(%~|`t_dw>hbnY)->`_eFIFOn
zZZ^toX?Jf;dk7TD$Z5KdFG|c7*bF+#<;mBS5w2B+R36=1<(gPH0{3=<Qt`fF^3^ZW
z@POLIB>?^vJ0hqs<o|2A!F@3UqBc-noD23}ecol*|97l`;o>};|FBXB1onTXf6b1R
z5D1t`2?Hm^1)v4g;3PE+<dzVC7Bhg;3^341f*$@ibGrh8u>Hs3dN5T&5X8ahssB+k
zv4fnFD31Sz@qadmaf3FJTpWKF#!IYc{>OT`qT~e?`L2Q$k^)dJB<PKXg9-eUm;TX%
z3{o&C0(GfE{Y!Mh{QJd7J}^w`)@AYecV#C3r4k7qO0oP^ia-Ww7AQFeL`(Ck{QVg&
u8)W+b@Zhr`1mb|T*Z0AC`CfnE<@Jkt=xdW)2ISXHdWh-eOS}>JCH)U%L(x|N

diff --git a/pandas/tests/io/data/test1.xlsx b/pandas/tests/io/data/test1.xlsx
index 8f011d06875215861d141a08a904aab89e0dad17..a437d838fe13082f989d7f8ea253f091490c2777 100644
GIT binary patch
delta 5385
zcmZ8lcQjmU*PqdAl#DJ~5G5Ew)L?WYdS`T^4uUA7j1q$Aqqi7_=xwwp5xpkSqa<3~
z5K*FcUU}F0?)~n2{y1x|v-h*tdiL4n_uEYlO>T5L8n}2M00Dpq006K9-0<ezLx2E4
zZXMly9HilpdpDS}!+29p>Ag{}d_{RV5pyfEIp+pprav2P3_O5<1i2JrYGC+Kfh|p=
zvNyjuZaYKELpN=R8HdsF(Ks}R1DGQX-{QP$)pOF1QcFsmF884oslx+x>}+c~wj72M
z8=H?RLDt?B_qK;ZZ8l|0@)_E6cvruGlP&+!M~VUG+#j<#>^uxBCYYu#TnLD)%T7ye
z7dSnD4h@<lw^z<g8)WG$W^fz#D*W|zQ;w5doF#|3VZ@O}@$CjKPdq-r&|x#erXLL8
zcpRm$vqq+~>WpzU<~3HaF`-ZNa|@<We1ct?ZrwpO-=`;6x57Wyp$W`*tryY;z9%WJ
ziKK|gozI>vY|!g>_hDU5e68D$eqqDj*hv&rm~fo7(b7qMkKzUsak>)`_xT`E<)x$-
zWl}oVa&e6!-jjY<DF7j#%ZGYRdKw?BMyP5bCy|(20@T{==5F2GjB;cnvbSckY*Vlv
z8PpB(T|j?AkRu42<8IP-C~kkn%?&O9RZ4pY`vI2=m$<#9kP?Oq092xO2-uNC?fivC
z0}5;1{4NCo;XZBjL{70y(P|i|UR`+M%_7>KUo%&6p5$>egG>j*dj^XR*NTaEj!<u2
zku@vpH88~CQ=hD&RcGZ_jWs7lB`qkgNfb+_$}YFZ66_wPbpaZ%4hS7%;EOBM|2)}X
zO*2kc{|x7$bQzzmb;WUhH4XCqJ3;zKhClJ?A6P24$Ujjiw~S!-lh&G3m?*LBK4gnc
z>jeZ`{5E?t#&^!x5?k6<;#*ef5<0t|lJn{gNRmeLOCUSVTso!iuK}%!<%|zY17N(j
z;$c6x3~Am%UEV#S18g5*KChnCtzT&CjpeT|zfenlJNu;BPoh?DNUk1=B&b}Y8XdTa
z{}It3RfS1vM=HH0bO&a`w*80RA5FYavp{mBb(B#{a*aRB^6uFui?`;d*xF-LNpRUT
z?a`<uknNY<=Z?s&NkiT3xfD()V3xm#l&S3dW<caDmF7US$`+sOO&M=%X65a|Xkvm3
z2y-tfwt)hMVI;UBPol1nohe(j>ya;UJl$s#qx^*M&mer(oT6Oj!J1E3M9W9VWWEJW
zC*^&Nqhc2quT0NoDwCMZnWsGjGYlpydcF%A9Qr|FfjZzgCv~{%a<BqXKulbn2Y7|H
zLA5tK9uJ2dA@+dozaPDOb@9?|eZb4AC-Q8is(|mv1DW7x7pHoF%#vnR))zNRC@>bd
z>eWntZ3re+t>B46Lco0LWiLtj6ZGoF7)a1Hd0Db4$M$>A=YL-&_iG0Q!KCGww3@6Q
zTQ7h*pZks_7S@};z51jqNWst=mvrDc-vZpallPbOXH8%V^MJ5{CGt3bFBs?(L6o>M
zmb4vjXcJ}Y=AcH7>~ypmq%~Ra#2JxP)NT<(RP+18q-9n%uOLmXqw=je_r_@^9TzIw
z_(I=l$bqz7me4Z+#^R2PV{~ngev$v`lCFIxc|b59Azn$i`&-#VZF>37$IX|%B%vuO
z3JeQn#$*X#A0Qt-JT)AV8}XM(RizkfLl&%y%&>>uuSiFNC}WSqC&Hhmb<#h^LXH?$
zJjMvZ{4s(p7MGv@Vp@ZLYy?ZO^cr7}YO^lAeh2F}cM`<2{y2}%tdj0iIcJ~~n?=G0
zDx$eMyB~S74X7m)nwEwn%)DiY^tSWa;9=n;yg+1XmrD<rENVBRBVs)Gd5Z5h`WFb^
z2+QUj;M2lhbj-bnFU%K1xNCjGQY;h%3Di$-<T&RRzWFUgYabd&SFpv{QaX7XPc`ZN
zejXI;PXMWeqnR9aA&GcxTi5OvQSm=KW1fCXCgMv6(f`;{h7OZAWpjK%lc!Cb8F>>0
zb4erf-uOg{#D9-JM2fL7ImarZKd{EqMB6RluirFUn9fM<ScJHb?~{|B{dE~b#zCmN
z&2NvN0UAQr$KnVNE`YVYyOYLQUi%%Rzxd!Y7QeNgh?GWYb#apRaO#6|Ob2>{@1|eZ
zgGGDwj}yJ;z5Sv7FIW{Mb5Q~&LNbX*9*X#D8+L;se<Fi27NSJ8P~F82lI;)^i&Let
z-opU^YEhlEQb?D;-Ar~9;8XZ|ZTb1mJv9`ia#~@U{Osjv&e?$7YKFc=GOp8oLzK?v
z=e%JmrLe~fFRxGY<DaF{kL2VUmxsN4ABGnCIY6<e`!kOh_z<FCM{Q=dpc2QRtMD<t
z$jnKHH?{g*U)iF_q|0(ZXK8ijZY!|jo!dqh<xfZYavmfmw2}IPkufmJskEi<Qj^!*
z8$G?s0Q-34zLfB?++4}co@?8jq8LLv{;Nqh+k@{gc%K`Vgz6s|2n-^g>_kQzXrR+{
zBVQzqev*`%e}k3$z6wPg-hldZz~qE;@MZG9I)%7A5DsVJv!{8r%Ot;si;Fr&@16%z
z;~jW1RwGqRTgX&UhE0qgvFc#6(V003nBxkk$u-(JSL5r-Cz^2`PM;O!ELBs+ldA*$
z?s2kyjO#$WUYnMGA6Sr@%z&-6pN*N7&;(-0C93H7!}xPAJI-911hdtK>Cy))u_5&G
z#c+uJz-PAR;=ajMX&~b-sr$oAGuC)boKe0AH>9?O_sFD4#r>{tM_|v9HteeMjd^B&
z;e}7w631}CYsJm}t<bU<<h)mDz{$Kz>DkFVqBMTDQCku2TVpfl@ouz+QPDUfJP+O!
zL~7NtINYB^68j#+&UFqG_|jEp2sbY9gbt~gR&0(`_R8TFJpa;V$(vfAq23Jng48^x
zLyZ%FNB}L_)v~u4k%*7_K+Av>74(O@<))e@=71>&miUjPO+qLL6b11rz1*gPOAU??
zesSuRfRZNf%JPtt1a-x1d;a2N`TD(4k8AJcwVyg^P7fNA%e9^_pV7x;SHNZ}K*)>M
zjd-v~`m`j!p@7~cA6dxoVyDJ2)MM0)=yj-04ydLIZmU3M^*WY8TMStkg;Mw#YdX70
zBOFir&?B!rMQ4dMDRzjFmALNxD|D=am#}e=dCWWuyIJaJcnCH<UUHl-greFOfrTSr
zhU9GN^{K~=k+ueVrw)evAJe(kf{8f#$00q6E9BZX^Sk27d_#>Q<+UtHL%Ij>b2}04
zI6ut7<!(0~)wt~g3ncDmp7Tsh!BX7@1Fb9(lMmQf@ScZ5{^}cC#*ehYQaeYuQk%;6
zrG9F5d(JaO9cBsU6Mkp6xa>Af(b%XRA#v8l_*3^{DNzrd*Vo%Xfd^k0O`9$`pj}m=
z^gtDNnG7gAcZD=)Kg`JtS5;I_Fbfm$Y$za@q}Ivt6s7Wx2RYl+@MiitBB`P=Qf-P!
z-rZed_40)v*{iabPAuD)PgxG8h0`pM2uPN5=7mK#ee47WhtN48henHcwp0yBQ567Z
zOSYgrC7ug2x*o7Uj#3}XqKsZt2aMimWJ=WV6CdhzA1YY+7w@iu5#|h_HK4<%Ur^H<
zC_`p0zj%wDyzg^~c0FI_C_7T7CTi9!%c1!KJrF$S75&feoTn_6RW7iDSHW@V!)Atg
zzOF5&cnuRLY%p}{6S#GOx?kF}PSh9y4T={`iRECpLx?P=&Ky;_1wQ__@Ra-(7Wumg
zdHV#oIe7ocGK|rJ`+@{zu+SKi?X-HscBGM9UHP7>eAr{2q%B)3=a~AKeGVN1zUGZi
zKeeHh@Mk$}!F3WnSv`-$Cx$MxJP+kx^rSw0?oVT_<bZ~9C-Ppe%bv{8i$x8lcyc+%
zes1VO?qZugQu=cxJEV18DCGya`ySYBR_%)S#gI(E%Ce!eWvR5ip{0o-`mWI7ektM|
zRY>hVz76#Wk-2U=o)YC%V^xPjOg6o*b#4~6r&|rQkf>TFD(P7;AE}{NAPD<O^y1tr
zdh*&g!8+0?zI>Es`MkW`1cH3qwkIyj%gu?D54opDe*rn<Io9f=cK+#l{=x0cGu;51
zO+Ztg#@$EC=AvJF#WyXUwIERX+d7Twqu-&tgcHt|V+?2b@qVX4h7xtfp@3zg)Lf0#
zvk<_|Ui8>GD3(8-%!d#(;9R0uL+nWSvC>o-@B4bbhCNL+n!qa@65u$++(VE{zix+I
zPL8Jnnd02DElrmsaxz(Z<0@gP@aP2mX@}ug5%)Sc*744JX|To}Xa-m&BXt24XqfJ4
z#$a7Dw`bp}GW|PHr(Zt8L49aYSG^maF2v%kO)r&M1IoEOI>6<J5pz+?^H?D~bcEj{
zR9!LEF9@(XtAw+TPS%k@AkHMiXL(5SADu3vsO|1*g?gHIVAc^(PorJncKypjvJu$_
z9oF7rs=w+SYhN386;?Cuu_nhBX4Eo%qbhR^c@OUk+x+?sp;&b-q(^#VfMF-wkuMu)
zz`E8UoYxWVDRQ2^so}@mrZ}jn*Bcnoi#yf6!=$rsJ&C&wJxpxGbezA8S%@JqG}Mw6
zGeu_crmDX7`a!Ne-ydbaN`Kz7cJ~F&XnhXft{{`0kkgmD-+9LXTvci#AFo=nR-Lc1
z(kp3l#y3fC?+?NFo1GYGFwt#~z9rzsdtW;Juusn2clYr{j?Df{m;1H=o#^r7J@?(d
zkze0J-gt!bv|LLDa$m0|d)6@8Hu$%EeaxIs{RB_?rN8)3J(;_*b8X$<;$+A2y_b|K
zmKB)^EZ-9;c)a6q_2@9`wPblU9@6;rTW^5Ocb3k5Vd6{h)*CJj*+oWf9>;yX3XRAg
zW<1o<NTTkjRlXH%BtyN`pAmlt$zgGHd0Sptw|yK0Ky`s>k(iV&tsXF?(yrQNM5Es(
z=c{ss1b!le)qbGt2Tpfe-Z^qh!tBlg9nP1N<_Q-v*yFg$<F{x!)*0;mhwMxCY!7Fq
zB-}LogC)Oi$$NrlGxNvN!RApN@89c<LX?EOh%Jb|SEa)Ie8KJLaa$S56xOHFMIKy<
zi`h}qu03NTNRE*vFYq16l&B34dNw?BtV6p=Dk6x`tZtHTLz*kXXg^RQ;sRvA>#qjU
zyrN)iqvvQjpL>1Z#^1%(p3;WJP6QMCyO}uY#{IIv_3PL1DRI}wEwn>5h#tH7!4(DN
zGkhb=sDJ@?S}tvVUwX5F$PJL8GyVGZ{(qR4js|LufDFsR>4%HoN3drrgvx#(ij>ys
zk*AF6Brho}C@Ct4cgDdl6o#q+snb(PmG(GPIVkWWc!PCzKHw)Uf0m#cW5WZ{ZdI<$
z1#Blcw%<q<k4Ro{K4d^WLVs{IQu_kL7T13qYC$c!UgyIMwx3MQF+Am8_k1=SC)e4)
zIsKV}U5<9sqSm5N+h~E?R>M-O9ohJ4+walYqDxx0mY;E*^W6zzNzoFV93kljP6L3=
z4AW7IV{bgzpVKX$DA7@212$3BvK|)Mpz%?})>-cifkmj>9SCuL8O*QNMe<z1@+-us
z&tu7^MoHE-5one-o~63;(OizO$#jRwfmy^8a`>ekjfB6KR55AZv|fiGkL1jfsoVM@
z@=WtH8P&5`ibFa6vEqSH(kR%^D5nL1Bu`$8H~0C6`2VC7o_*}Z8!RdB10d2uqX$eM
zxr=|JI24={vdmMwZ9VMn!)B7@PnY6nUVFwC<9l^dvTY;|5<+<|K3IQ(fw3qv1%=hY
zhDf+{<8YD;gauD%5bznB1e{;d_83EIzNrZ1k@MpcOl!gL2dpk*+;?kS?Ubg1Rt3U!
zpRl@OLdss1MfQ#seHF^}MC7wXF*K_^g+%;!eA;EA`w7cH`pv9pEj1NK%(dt?ku^w>
z6bFNqh@rmx{=)j@mAAElU~YJu<wc&>bD_C7t<^1AGg+p#>esDqaVrN8PT>cKBGrsu
zl0M47X|5uCG5K7PqnQjWIuQol&<J8D<7(<{jo%F%2&+0r;476=tKQJcc#g{HhT+TV
z3jZa?zp!<UHZ78hM{&l{*<_15>-7daZ}w+h?8s)N)}mWIJiqCcIUEypO=%yrrRZSG
z2Oec!oEuO5m!%Nb2&N@x+_Dr~Y#Kj15ZUFv@Q0(|9^Z-DiO&_~iIO)G#xNmfYXRzQ
zSnkO>9a9md8MWiXBKXvcnWK89p*N>{$*EJ@WqYQC#1>?cCW2ncklH$*6_e;!Jzpxc
zITS_EqOt@$y$m{EHxpT>1p8>IB|pRo;LsLwyNK%*R-uJktR-vp`ljetMejKvEBPE(
z1n1WYaE1)?m((PvUQ!QN_=<pvzLyl;hzS&JpZ(tRbZ0QB_zeG*C)TUKx%;Ukx~_0*
zWGLL&OOUT*AtG2S;ggiHSAKbi7)z5|CI5G-)F6S<K=9RsZ$H;md66bzy6y1T)sMz1
zrQ;E{Ly5?Zr(WEYc1LhKk>2;pNa(raQ*9GmO1>d-yXQpf9S5^jM+m3o<@!2tdWcDj
zl8nTDs9brT`XR;YQLeu=eSm6i@aN{-15Hds1H!5`#ES7%CWXva()dmIxBe2bmC<fc
zu=ffv>%3z7BRrv#cMr>h;nb-?6F)=rA>R(1Q+73*8i`eTtPqPNSFQ+Vs?Ipe#fSU9
za#@EfOV)m^k0DVIK|12UmuM&(K|QJFTliw-<>2NmB>3m8qXEPbxNWlkw{iP>wL=I1
z{8{|`^L#~Z3vw|1!%G1G#($Kzk^_oQNEN4+40R(!gNhas!ogDgSNtSI!}K>z!vg^5
z|8=ShbuGlt{10gVUp<K7j}xFMDj5vA#bJy%(M%{d1{#zZn3DN_(bzwZOE^$bU{>aT
z&yj!B0015L?NpJ+QGS3D<YGdx2#eyV^4yACr~qL;94IeJn41^%NtlxPzuopW!70fA
zfCvBI0id1l)`%0uB0|Ibr?+VRse;>fP4zD$@I#cg$bII&%j7opl>ahl6htM6Fy5iM
OJ(qtQn{eSj+W!EDg`_qB

delta 4233
zcmZu!c{tQ<)E+akjKSD4jI1NZGIl~`m+X>~eNAZWLYA?P5HUX?TVzclyT+ECl$}DB
z>_tTO^_%zouIqiT_q)F5uk$?TIp>e(xzByhUHh=sk=0NKLe31L08xQJARbW3zG;2{
z83@E&!O925-R^a36Nfk5{i&qhe5XUXprnL~gUDfyTBS_$R~2EL+J-ToH8WKemzYK>
zyNJcu1<pt^wanPuyW0x}vpY!*-JIej%TUCwOEbM(s2k7IN=zOr@wzBk0cMUk`=#gJ
zxE)G;*Zh<E&Bd4b9gUG@wm+|$&Y&%dS+<<gw9B_&;L^j_Yp3WX{Zc;MierZjZ8j<A
zF0=5L>sVO{zd#5RR(9Xxg_u4be_V_;Ew(T{jmuw(ZClK-yk5?xpuwf=EJ9XXc8d!p
zg=c_QNHBYeIfX(8_@?W%-%CQ<hy5Gv*C-L`S?y)AzFb!mY@|_YEUT7d>y{Yd!=<)|
z^WQ|S;pn{FT<zk9&Da{of+LkUHQB7QglU{lH0Gpyea>SNj+KU0f>xBfR+ji;#04lq
zt4l!Tr%25~<4*<9oEsf=0grvjlq#P6n0vxTv65sg^5M|(tEQ&$^frt<47RRnKf5#~
z;YDus?a>3tI}tPOqV|RREu^<UKZk(+{uLk|NnO<m;fGKkbQm8SK|r7o;12~44&EsI
zO17JUmo@fM9>pZD7O-cRlY^e*<xLX0*$E9l3ydcG64~IvUdWh&an8bK6IZy1MAdx@
zWUd1~f+?IKRhLE8O)@A@YQGD?rsJq0%pZW~s&D1Gyg8fY{=C^7?HdhJPKTwA%f8-3
z-q}u1XBzXMPN^A|sSzJT&PwCbN7|Azr-<8Xs+u8sQ5_J~lG*Uwao5%Z-HO`fVLI;g
zyQAP><JhkCA(y?2_31wwYPZr#kfEkA+FzqNne);M;^J?omkCid&@hfVAH>gb!5_4p
z>f0zxmR!^o&Kut&&p8d-F*+gJikQ=Y5*IicY0-N!Evy?b`qG_Mu6ww6I@3IXccCY8
zW}xApH3Ze|GX_z451OsTAW@++Y$E2B-k+%PjeMb}LN4o$JR)L}K~UqGB?B62O(GU!
zW)vm^qoRJNB;RwWofmG#nVE$6NLFoK@=JAQ>6Xyc8<BQ6CH8H4_ycLVWo_}PwhwF|
zDj(o(iYv9CF~4Y!4=Zl(T)VP6I|T{_S^jzXc2(}ep^8vWfk=U{w+1%G%+k?69pg^x
z;1tmM`gWh|j38g>aCnB}0`zF`UQfb3Xia3#kL?#_KRg*uqbkwfhrfAvAU|wR6y!fx
z76tBF-HW$rObZTYI?>+zewAmcv+NtIhmFdPnq(G*5@%kJ?luiha_@eX|If}pBVYR#
zw!UTnHnLd&jaT|g3pe7L_KR?cu-`4uLAHcBD@DoR!s!{t#A=pKruZ9`$5+3!xa`$h
zV{H7;AxuH8+3HJ$#m5+j+*&ryZp5{90V}F1totDbY95XnAdm~Wn;!Rc7LpTfb^C3G
zhR14*ZShR=4n4Ti`V~2!3v`&wLC^l*G-Xv`YaG`HWW?%^272~br4ly+QO9;iKQU{~
z(45T?`c8m&ZRgeM+Hw!_$&PD3vWs$aaA~<m@$oY4LWJL_j*~&mP^^kA(QObELp>^E
zOsFwai_&YYe4{ioVbG=DpBt?JPmCWyJ;ea`7NdHvZfkBY68u{Qp-o0t=>dOsyeFeV
zZoQw#TNSeM>`Eo==E6@amljUdWI5eD1qU&oWrSI!q{~++$|A43c#x}>{d#eDT;#ot
z1S%lR&>8AeK`^Q2Mj=(fP&6417;tG9hjnd6G(`*rdlK9nlNFkts+61WgDxA*4|d3=
z><PFl0cu3$UQ8})1nMMMX|U+@d(mbGD}_szNg_X*s=DBf*_7bHm8R=gBe=(FhqJ1R
zZIr{+JaKinc75*?^}=1$t62UE_=(N@{9&7Nv?nJva}=|aZqLU4!xtAYPW$nyUY+F0
zSg2hgI^lV^N2x5ge#Eo>bJqr<n@*7DlGC%4a?>7n)_w=LeC*FbZs9aDRCn*?m=5mp
z_C+~-Z>mitvfV*Pd`iZ)XEk(8F*;3^Sn4uv6&_&5f{(`FL!hwuUBc-?`W}Y?wb+I|
zy4!CuI&<1?i-yEJKsAJ+su{)rh5Dz^>t6<gKp)5f5jHkp`yw~4D6ktA!}4MDk$%b1
zhGHTi;+6K0HuuEIa>k((#eAy03E|oUK9elN0Z(+8S+T|VTIAVD67A(=MBiFwY6(6v
z`@>-B&O-D-&d&Hlkcf&ZmXTOd8-tLtgRsaYxa4RS7!v0%);<2Ln-{h+tz*&~^o7>6
z(@}3AQcJ1bP=g0|LFD;Y6T>Mfb+um6Wp~+1iED2e2j+yz>Kg8I-yY+(<JWOExRgxL
zRxe*Ob8Z$I+8;uCswA+Bac$ai6$YNcUlUY?^Pp?oFS5Kts+Am#yE>zGR?pE-<?Pt^
zDzS4cOD$sj?m@PPcfbD5dN&7l?JLO3`owhP#7sWo!GWQ`-Md0hk*86M(5*5OPt!}8
z5VyGhT`=vuz&TTA-D55Gfgq#8pI4vqMB*C`5Ba~_xQ%ygkNF9zn0`QT^8fNLOP;Ky
zwBAaJQy;nXAjZP<ee{K7M^z~wmk>V*j~KGA{B6I8&r*CLrzQXgv|0F<obLh5tEmDp
z?gy5h)YndaiA%8|PY}dEQh0d7(yMR8^bMrd%|5;G`>(nMjq^Z>c#S|Gyc2`tVCQvT
z``Yf&4kc{Jn}Gev`8$6$YnI;1Hh#H2?E(bSPI-KGd_068{-k+SJMmj4==)<)S2b-x
z;;qQ!Rqq-L$}e|huwiWy{KAfJOOB=~&Y45IEGeCe3_nuuWz?#q;tY-`{;C|{wG_On
z5J^Xgd%EjyR7OHdHU$VoOBy2lABuVV1Uz)~#`t3&=9^&MGsNL%*PYK>xBXml68RYn
zMfH+|uw(g;Mw%6D7~h1bbon@poE`VW8DHtIjR#Zvtu%BGG#wBud~f9l+-EN{iH)G)
z;wyw&de!J{twevzupc!o(H5$<L(f!ESUcj}XHoIfG>=;BKk<ed>c#qvkL#$W+jXO`
z7@yC~nYSmkTvTMthez>4m7XfM5(oF4)Nhsj4t<y0IUB0w!Z|f5TwVHl35ltn-Zw?`
z+66S2Ob*6_N-W_+VBgkHk;PIA{7t(P*9feda>Pp3P#qtWt?+9_4cs)UaZHTdEp#~a
zlNhe-rI{CZc{MuT32aP8SLF|Xlr0{7TZyI%D;93uHu~W%<acB6d&J!o>DeJB7Mm@J
zUPcXm<c`><<l9D0C^~oLYlj2YUYmLBR12!QJutjTSlPY%x9N&jxs}#kS5w2ukZw_V
ztW9<JjR^5<^2D<?k%<S!ZOYFaJ=o}Rg(3{4m5Hg}r?QUCEZa9V&1v)o2!xThcW&2z
z>V6Ud`oXOGBv5Y0`i<0jnQF=N_tkZdvEL8t)8lzjUU14r70(0XYO+8u61;?q*0A*L
zP>Q3oWwg5Q3%gNW)_GR;9C?5HY?oNUb`$dXrf9#A=O^m-&e~g9G#ACAD6Jlh;Jj?#
zAL+4xs@C;=f87`+I(@r);!l%Y=fHxE3ckWwc!0t=@VhU~ukOYuIZvtOAE4;1l5}Ex
z_tYSKbk?Kn+4Ed-Z~WqR^*A4U<7D<<>0Atq^RL;PUwvF-wL+t`Q;yS5>BW^2LiSaP
zA|6aonO<2=T<7UL21GlVmLf7{v~zis13v~c^q=f|&IKM8aQP2C?Qc3<m)zUFLiMHU
zfiKg_R%GDG$L9+GHzFmKk4sE6Ro6pi&t-jZ6siZ_rN5b2!L+#3O*jSrg=^}#@vn&s
zBp6czn9Cfvq*i@WKfrHlHL5Kt>U5LwB;OBjL%DIxk-Zvs<tuo^+VEi}8w9|c_6~3<
zr^j{E8{WFhm#WkY*=sYPuv>kW`OWycFQ;Roh#_T5EnU-g>I;v~w%cNg(M>2BSjk9M
z%`n{+iizKmw}mMC?FV_i-5oz76xiUtx_iVDw{;5yZ<uqyWzptN$3p+i3i$|VBe0rv
z6zDXjcq|&y+RiAg^Shp0w1`WZmOrv<k3RzfT=(l_f0|uLcv^3(U=3bZ>ZZ3C5rquA
zHkabr>j)U5Gw6$3KMxfx-?0YXL0AAMX*l30$OdFfL#yxt>JWfh1`bRLp{tV6@?_+|
z?WZnPsv-hpI3t~QaptJm3+HORff<Ek5~@?J$xW-u?<BWGQGVv{Zt5`pI@;8mx~Is@
z1$b|ydeisb5iq&%(x$Z3H$jUz4<ub4H5$#)l$_xa171tJSXv^pSPY4~)46NXh5YCW
zF(`EVaD`g-Hwue(_ww;6%SD)nkas>@yfhhiJSn(*f*a!SXg^R?1nJNFhp}9_oMvB7
ziT|vw?vy{rRCy>q27+^_U4T|srKyvj`(Hvho^v3QUl`x~A`?>N{Y~Edw^8Od0bv3A
zsam59I*Fi=aO=8f1MMZbqrrDM43nt!-;*u77_SG(o1byl;4$q|xndP|<JjQYKx(^b
z7FQc$zt(T}R!H;-AB*}N(~!h@U8T<EMjx#ZAuUndDzdjymnuGW=h4=9OQ6K80EgJ<
z;qG}%-=Nw}vo3TKk2Js3yj%{#%la&Kp5||ijSF;XCX=R)8&#h$JY+Zn_bG8$=O$$D
z5^NeN-)jg9D}AM<xbx1Ij8N;OS7@JG>F}g9X?^5bt<jf>mwj%`v+eB$`T1`8@dFB4
zt9F7NUP*DV@c06nZq=)t%@)BI*$C7HTN27V{h{-kxRabm=-R{{vilqi@8B{<4M~hx
z+W6KBePPdq3cKK3BSyF`k|^7F6^!Lhv{*W<iDXg{%x!cjQkJ;vrf@VnC^?x;|B`Mc
zC9H;sHQy}dX)ZWhei(}o{<vXqv60|yBr(mo?&{?;{+{qBZ_<3hwy%{a$*I!2kADmr
zp~pH|sJ>O0^szw~!ln&4r?efEV5-lIpY)@gz!G=dk@RZ6%yIeoFYO<<vWFVplzsX+
zH#2ah<~aXLL|B}TJ(qa+JC7KDk43VUyNbZ7SjU?D)(7$Jge%9{?(!=cT9nSa?#`&J
zSr-KJpHt21cs<v)7g4W|ezwtEuZ{M1qMAwZn7DmBi4qKOA0ZUtAI}{M%tk+SWlAt?
z4me{z68Iy1M$?38v|3!tv~DjgT0Ev7Y$4VD|0*ytuqf#NQN}=q#4WN)U_*iv`Hw84
z1cB&DasRF_fPf?~^55wa1Y-Z!NwQ1;2gw`YR2U%6#saiSih<J^{x+$kSUCP2y+9zu
zf1-*n{(VVbiiOmE*}>#&K&KQx$Nz1>|Az2#0~FF+9RE-nsY^5e>m^yoynu?dG8m2m
zYz6p%LlPI%OS6L+`AJ(OuqiDKX1NSJmO%kGWmrhWzz&WUAZ_J2{-Ona5`OJS2^s#A
m@K6vKkm2L_2jC>l3jfcA5Mh8$mYohxIyNN-fwVAxdH)BkrjyVB

diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 741d03a8dc0c29..7cc1f1899db980 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -264,6 +264,18 @@ def test_index_col_empty(self, ext):
                                               names=["A", "B", "C"]))
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize("index_col", [None, 2])
+    def test_index_col_with_unnamed(self, ext, index_col):
+        # see gh-18792
+        result = self.get_exceldf("test1", ext, "Sheet4",
+                                  index_col=index_col)
+        expected = DataFrame([["i1", "a", "x"], ["i2", "b", "y"]],
+                             columns=["Unnamed: 0", "col1", "col2"])
+        if index_col:
+            expected = expected.set_index(expected.columns[index_col])
+
+        tm.assert_frame_equal(result, expected)
+
     def test_usecols_pass_non_existent_column(self, ext):
         msg = ("Usecols do not match columns, "
                "columns expected but not found: " + r"\['E'\]")
@@ -923,9 +935,9 @@ def test_read_excel_multiindex_empty_level(self, ext):
             })
 
             expected = DataFrame({
-                ("One", u"x"): {0: 1},
-                ("Two", u"X"): {0: 3},
-                ("Two", u"Y"): {0: 7},
+                ("One", "x"): {0: 1},
+                ("Two", "X"): {0: 3},
+                ("Two", "Y"): {0: 7},
                 ("Zero", "Unnamed: 4_level_1"): {0: 0}
             })
 
@@ -942,9 +954,9 @@ def test_read_excel_multiindex_empty_level(self, ext):
 
             expected = pd.DataFrame({
                 ("Beg", "Unnamed: 1_level_1"): {0: 0},
-                ("Middle", u"x"): {0: 1},
-                ("Tail", u"X"): {0: 3},
-                ("Tail", u"Y"): {0: 7}
+                ("Middle", "x"): {0: 1},
+                ("Tail", "X"): {0: 3},
+                ("Tail", "Y"): {0: 7}
             })
 
             df.to_excel(path)

From 2712c8ff8d46b49868ec8704fb915cfbd9cf5c7a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Wed, 28 Nov 2018 10:22:24 -0600
Subject: [PATCH 21/78] Ensure that DatetimeArray keeps reference to original
 data (#23956)

---
 pandas/_libs/tslibs/conversion.pyx       |  5 ++++-
 pandas/tests/arrays/test_datetimelike.py |  9 +++++++++
 pandas/tests/tslibs/test_conversion.py   | 12 ++++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 9d6daf3d425230..bf5429c39e8fee 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -99,10 +99,13 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True):
 
     ivalues = arr.view(np.int64).ravel()
 
-    result = np.empty(shape, dtype='M8[ns]')
+    result = np.empty(shape, dtype=NS_DTYPE)
     iresult = result.ravel().view(np.int64)
 
     if len(iresult) == 0:
+        result = arr.view(NS_DTYPE)
+        if copy:
+            result = result.copy()
         return result
 
     unit = get_datetime64_unit(arr.flat[0])
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index a1242e2481fed5..902a3dda92bd60 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -167,6 +167,15 @@ def test_array_i8_dtype(self, tz_naive_fixture):
         assert result.base is expected.base
         assert result.base is not None
 
+    def test_from_array_keeps_base(self):
+        # Ensure that DatetimeArray._data.base isn't lost.
+        arr = np.array(['2000-01-01', '2000-01-02'], dtype='M8[ns]')
+        dta = DatetimeArray(arr)
+
+        assert dta._data is arr
+        dta = DatetimeArray(arr[:0])
+        assert dta._data.base is arr
+
     def test_from_dti(self, tz_naive_fixture):
         tz = tz_naive_fixture
         dti = pd.date_range('2016-01-01', periods=3, tz=tz)
diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py
index fde1d1718a2a29..6bfc686ba830ed 100644
--- a/pandas/tests/tslibs/test_conversion.py
+++ b/pandas/tests/tslibs/test_conversion.py
@@ -57,3 +57,15 @@ def test_tz_convert_corner(self, arr):
                                        timezones.maybe_get_tz('US/Eastern'),
                                        timezones.maybe_get_tz('Asia/Tokyo'))
         tm.assert_numpy_array_equal(result, arr)
+
+
+class TestEnsureDatetime64NS(object):
+    @pytest.mark.parametrize('copy', [True, False])
+    @pytest.mark.parametrize('dtype', ['M8[ns]', 'M8[s]'])
+    def test_length_zero_copy(self, dtype, copy):
+        arr = np.array([], dtype=dtype)
+        result = conversion.ensure_datetime64ns(arr, copy=copy)
+        if copy:
+            assert result.base is None
+        else:
+            assert result.base is arr

From 30c1290610c65d53bcdd1665be3104104ca27eb2 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 28 Nov 2018 09:45:46 -0800
Subject: [PATCH 22/78] Collect Index methods by purpose: rendering,
 constructors, setops... (#23961)

---
 pandas/core/indexes/base.py         | 5344 ++++++++++++++-------------
 pandas/core/indexes/category.py     |   10 +
 pandas/core/indexes/datetimelike.py |   11 +-
 pandas/core/indexes/datetimes.py    |  352 +-
 pandas/core/indexes/interval.py     |   93 +-
 pandas/core/indexes/multi.py        |  474 +--
 pandas/core/indexes/period.py       |   23 +-
 pandas/core/indexes/range.py        |   11 +-
 pandas/core/indexes/timedeltas.py   |   16 +-
 9 files changed, 3227 insertions(+), 3107 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 22c348acaf3413..e8a2dd4879f204 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -244,6 +244,9 @@ def _outer_indexer(self, left, right):
 
     str = CachedAccessor("str", StringMethods)
 
+    # --------------------------------------------------------------------
+    # Constructors
+
     def __new__(cls, data=None, dtype=None, copy=False, name=None,
                 fastpath=None, tupleize_cols=True, **kwargs):
 
@@ -518,6 +521,19 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs):
             setattr(result, k, v)
         return result._reset_identity()
 
+    @cache_readonly
+    def _constructor(self):
+        return type(self)
+
+    # --------------------------------------------------------------------
+    # Index Internals Methods
+
+    def _get_attributes_dict(self):
+        """
+        Return an attributes dict for my class.
+        """
+        return {k: getattr(self, k, None) for k in self._attributes}
+
     _index_shared_docs['_shallow_copy'] = """
         Create a new Index with the same class as the caller, don't copy the
         data, use the same object attributes with passed in attributes taking
@@ -608,42 +624,6 @@ def _update_inplace(self, result, **kwargs):
         # guard when called from IndexOpsMixin
         raise TypeError("Index can't be updated inplace")
 
-    def _sort_levels_monotonic(self):
-        """
-        Compat with MultiIndex.
-        """
-        return self
-
-    _index_shared_docs['_get_grouper_for_level'] = """
-        Get index grouper corresponding to an index level
-
-        Parameters
-        ----------
-        mapper: Group mapping function or None
-            Function mapping index values to groups
-        level : int or None
-            Index level
-
-        Returns
-        -------
-        grouper : Index
-            Index of values to group on
-        labels : ndarray of int or None
-            Array of locations in level_index
-        uniques : Index or None
-            Index of unique values for level
-        """
-
-    @Appender(_index_shared_docs['_get_grouper_for_level'])
-    def _get_grouper_for_level(self, mapper, level=None):
-        assert level is None or level == 0
-        if mapper is None:
-            grouper = self
-        else:
-            grouper = self.map(mapper)
-
-        return grouper, None, None
-
     def is_(self, other):
         """
         More flexible, faster check like ``is`` but that works through views.
@@ -671,6 +651,17 @@ def _reset_identity(self):
         self._id = _Identity()
         return self
 
+    def _cleanup(self):
+        self._engine.clear_mapping()
+
+    @cache_readonly
+    def _engine(self):
+        # property, for now, slow to look up
+        return self._engine_type(lambda: self._ndarray_values, len(self))
+
+    # --------------------------------------------------------------------
+    # Array-Like Methods
+
     # ndarray compat
     def __len__(self):
         """
@@ -709,97 +700,129 @@ def dtype_str(self):
         """
         return str(self.dtype)
 
-    @property
-    def values(self):
-        """
-        Return the underlying data as an ndarray.
+    def ravel(self, order='C'):
         """
-        return self._data.view(np.ndarray)
+        Return an ndarray of the flattened values of the underlying data.
 
-    @property
-    def _values(self):
-        # type: () -> Union[ExtensionArray, Index, np.ndarray]
-        # TODO(EA): remove index types as they become extension arrays
+        See Also
+        --------
+        numpy.ndarray.ravel
         """
-        The best array representation.
-
-        This is an ndarray, ExtensionArray, or Index subclass. This differs
-        from ``_ndarray_values``, which always returns an ndarray.
+        return self._ndarray_values.ravel(order=order)
 
-        Both ``_values`` and ``_ndarray_values`` are consistent between
-        ``Series`` and ``Index``.
+    def view(self, cls=None):
 
-        It may differ from the public '.values' method.
+        # we need to see if we are subclassing an
+        # index type here
+        if cls is not None and not hasattr(cls, '_typ'):
+            result = self._data.view(cls)
+        else:
+            result = self._shallow_copy()
+        if isinstance(result, Index):
+            result._id = self._id
+        return result
 
-        index             | values          | _values       | _ndarray_values |
-        ----------------- | --------------- | ------------- | --------------- |
-        Index             | ndarray         | ndarray       | ndarray         |
-        CategoricalIndex  | Categorical     | Categorical   | ndarray[int]    |
-        DatetimeIndex     | ndarray[M8ns]   | ndarray[M8ns] | ndarray[M8ns]   |
-        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]       | ndarray[M8ns]   |
-        PeriodIndex       | ndarray[object] | PeriodArray   | ndarray[int]    |
-        IntervalIndex     | IntervalArray   | IntervalArray | ndarray[object] |
+    _index_shared_docs['astype'] = """
+        Create an Index with values cast to dtypes. The class of a new Index
+        is determined by dtype. When conversion is impossible, a ValueError
+        exception is raised.
 
-        See Also
-        --------
-        values
-        _ndarray_values
-        """
-        return self.values
+        Parameters
+        ----------
+        dtype : numpy dtype or pandas type
+        copy : bool, default True
+            By default, astype always returns a newly allocated object.
+            If copy is set to False and internal requirements on dtype are
+            satisfied, the original data is used to create a new Index
+            or the original Index is returned.
 
-    def get_values(self):
+            .. versionadded:: 0.19.0
         """
-        Return `Index` data as an `numpy.ndarray`.
 
-        Returns
-        -------
-        numpy.ndarray
-            A one-dimensional numpy array of the `Index` values.
+    @Appender(_index_shared_docs['astype'])
+    def astype(self, dtype, copy=True):
+        if is_dtype_equal(self.dtype, dtype):
+            return self.copy() if copy else self
 
-        See Also
-        --------
-        Index.values : The attribute that get_values wraps.
+        elif is_categorical_dtype(dtype):
+            from .category import CategoricalIndex
+            return CategoricalIndex(self.values, name=self.name, dtype=dtype,
+                                    copy=copy)
 
-        Examples
-        --------
-        Getting the `Index` values of a `DataFrame`:
+        elif is_extension_array_dtype(dtype):
+            return Index(np.asarray(self), dtype=dtype, copy=copy)
 
-        >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
-        ...                    index=['a', 'b', 'c'], columns=['A', 'B', 'C'])
-        >>> df
-           A  B  C
-        a  1  2  3
-        b  4  5  6
-        c  7  8  9
-        >>> df.index.get_values()
-        array(['a', 'b', 'c'], dtype=object)
+        try:
+            if is_datetime64tz_dtype(dtype):
+                from pandas import DatetimeIndex
+                return DatetimeIndex(self.values, name=self.name, dtype=dtype,
+                                     copy=copy)
+            return Index(self.values.astype(dtype, copy=copy), name=self.name,
+                         dtype=dtype)
+        except (TypeError, ValueError):
+            msg = 'Cannot cast {name} to dtype {dtype}'
+            raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
 
-        Standalone `Index` values:
+    _index_shared_docs['take'] = """
+        Return a new %(klass)s of the values selected by the indices.
 
-        >>> idx = pd.Index(['1', '2', '3'])
-        >>> idx.get_values()
-        array(['1', '2', '3'], dtype=object)
+        For internal compatibility with numpy arrays.
 
-        `MultiIndex` arrays also have only one dimension:
+        Parameters
+        ----------
+        indices : list
+            Indices to be taken
+        axis : int, optional
+            The axis over which to select values, always 0.
+        allow_fill : bool, default True
+        fill_value : bool, default None
+            If allow_fill=True and fill_value is not None, indices specified by
+            -1 is regarded as NA. If Index doesn't hold NA, raise ValueError
 
-        >>> midx = pd.MultiIndex.from_arrays([[1, 2, 3], ['a', 'b', 'c']],
-        ...                                  names=('number', 'letter'))
-        >>> midx.get_values()
-        array([(1, 'a'), (2, 'b'), (3, 'c')], dtype=object)
-        >>> midx.get_values().ndim
-        1
+        See Also
+        --------
+        numpy.ndarray.take
         """
-        return self.values
 
-    @Appender(IndexOpsMixin.memory_usage.__doc__)
-    def memory_usage(self, deep=False):
-        result = super(Index, self).memory_usage(deep=deep)
+    @Appender(_index_shared_docs['take'] % _index_doc_kwargs)
+    def take(self, indices, axis=0, allow_fill=True,
+             fill_value=None, **kwargs):
+        if kwargs:
+            nv.validate_take(tuple(), kwargs)
+        indices = ensure_platform_int(indices)
+        if self._can_hold_na:
+            taken = self._assert_take_fillable(self.values, indices,
+                                               allow_fill=allow_fill,
+                                               fill_value=fill_value,
+                                               na_value=self._na_value)
+        else:
+            if allow_fill and fill_value is not None:
+                msg = 'Unable to fill values because {0} cannot contain NA'
+                raise ValueError(msg.format(self.__class__.__name__))
+            taken = self.values.take(indices)
+        return self._shallow_copy(taken)
 
-        # include our engine hashtable
-        result += self._engine.sizeof(deep=deep)
-        return result
+    def _assert_take_fillable(self, values, indices, allow_fill=True,
+                              fill_value=None, na_value=np.nan):
+        """
+        Internal method to handle NA filling of take.
+        """
+        indices = ensure_platform_int(indices)
+
+        # only fill if we are passing a non-None fill_value
+        if allow_fill and fill_value is not None:
+            if (indices < -1).any():
+                msg = ('When allow_fill=True and fill_value is not None, '
+                       'all indices must be >= -1')
+                raise ValueError(msg)
+            taken = algos.take(values,
+                               indices,
+                               allow_fill=allow_fill,
+                               fill_value=na_value)
+        else:
+            taken = values.take(indices)
+        return taken
 
-    # ops compat
     def repeat(self, repeats, *args, **kwargs):
         """
         Repeat elements of an Index.
@@ -838,185 +861,28 @@ def repeat(self, repeats, *args, **kwargs):
         nv.validate_repeat(args, kwargs)
         return self._shallow_copy(self._values.repeat(repeats))
 
-    _index_shared_docs['where'] = """
-        Return an Index of same shape as self and whose corresponding
-        entries are from self where cond is True and otherwise are from
-        other.
+    # --------------------------------------------------------------------
+    # Copying Methods
 
-        .. versionadded:: 0.19.0
+    _index_shared_docs['copy'] = """
+        Make a copy of this object.  Name and dtype sets those attributes on
+        the new object.
 
         Parameters
         ----------
-        cond : boolean array-like with the same length as self
-        other : scalar, or array-like
-        """
+        name : string, optional
+        deep : boolean, default False
+        dtype : numpy dtype or pandas type
 
-    @Appender(_index_shared_docs['where'])
-    def where(self, cond, other=None):
-        if other is None:
-            other = self._na_value
+        Returns
+        -------
+        copy : Index
 
-        dtype = self.dtype
-        values = self.values
-
-        if is_bool(other) or is_bool_dtype(other):
-
-            # bools force casting
-            values = values.astype(object)
-            dtype = None
-
-        values = np.where(cond, values, other)
-
-        if self._is_numeric_dtype and np.any(isna(values)):
-            # We can't coerce to the numeric dtype of "self" (unless
-            # it's float) if there are NaN values in our output.
-            dtype = None
-
-        return self._shallow_copy_with_infer(values, dtype=dtype)
-
-    def ravel(self, order='C'):
-        """
-        Return an ndarray of the flattened values of the underlying data.
-
-        See Also
-        --------
-        numpy.ndarray.ravel
-        """
-        return self._ndarray_values.ravel(order=order)
-
-    # construction helpers
-    @classmethod
-    def _try_convert_to_int_index(cls, data, copy, name, dtype):
-        """
-        Attempt to convert an array of data into an integer index.
-
-        Parameters
-        ----------
-        data : The data to convert.
-        copy : Whether to copy the data or not.
-        name : The name of the index returned.
-
-        Returns
-        -------
-        int_index : data converted to either an Int64Index or a
-                    UInt64Index
-
-        Raises
-        ------
-        ValueError if the conversion was not successful.
-        """
-
-        from .numeric import Int64Index, UInt64Index
-        if not is_unsigned_integer_dtype(dtype):
-            # skip int64 conversion attempt if uint-like dtype is passed, as
-            # this could return Int64Index when UInt64Index is what's desrired
-            try:
-                res = data.astype('i8', copy=False)
-                if (res == data).all():
-                    return Int64Index(res, copy=copy, name=name)
-            except (OverflowError, TypeError, ValueError):
-                pass
-
-        # Conversion to int64 failed (possibly due to overflow) or was skipped,
-        # so let's try now with uint64.
-        try:
-            res = data.astype('u8', copy=False)
-            if (res == data).all():
-                return UInt64Index(res, copy=copy, name=name)
-        except (OverflowError, TypeError, ValueError):
-            pass
-
-        raise ValueError
-
-    @classmethod
-    def _scalar_data_error(cls, data):
-        raise TypeError('{0}(...) must be called with a collection of some '
-                        'kind, {1} was passed'.format(cls.__name__,
-                                                      repr(data)))
-
-    @classmethod
-    def _string_data_error(cls, data):
-        raise TypeError('String dtype not supported, you may need '
-                        'to explicitly cast to a numeric type')
-
-    @classmethod
-    def _coerce_to_ndarray(cls, data):
-        """
-        Coerces data to ndarray.
-
-        Converts other iterables to list first and then to array.
-        Does not touch ndarrays.
-
-        Raises
-        ------
-        TypeError
-            When the data passed in is a scalar.
-        """
-
-        if not isinstance(data, (np.ndarray, Index)):
-            if data is None or is_scalar(data):
-                cls._scalar_data_error(data)
-
-            # other iterable of some kind
-            if not isinstance(data, (ABCSeries, list, tuple)):
-                data = list(data)
-            data = np.asarray(data)
-        return data
-
-    def _get_attributes_dict(self):
-        """
-        Return an attributes dict for my class.
-        """
-        return {k: getattr(self, k, None) for k in self._attributes}
-
-    def view(self, cls=None):
-
-        # we need to see if we are subclassing an
-        # index type here
-        if cls is not None and not hasattr(cls, '_typ'):
-            result = self._data.view(cls)
-        else:
-            result = self._shallow_copy()
-        if isinstance(result, Index):
-            result._id = self._id
-        return result
-
-    def _coerce_scalar_to_index(self, item):
-        """
-        We need to coerce a scalar to a compat for our index type.
-
-        Parameters
-        ----------
-        item : scalar item to coerce
-        """
-        dtype = self.dtype
-
-        if self._is_numeric_dtype and isna(item):
-            # We can't coerce to the numeric dtype of "self" (unless
-            # it's float) if there are NaN values in our output.
-            dtype = None
-
-        return Index([item], dtype=dtype, **self._get_attributes_dict())
-
-    _index_shared_docs['copy'] = """
-        Make a copy of this object.  Name and dtype sets those attributes on
-        the new object.
-
-        Parameters
-        ----------
-        name : string, optional
-        deep : boolean, default False
-        dtype : numpy dtype or pandas type
-
-        Returns
-        -------
-        copy : Index
-
-        Notes
-        -----
-        In most cases, there should be no functional difference from using
-        ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
-        """
+        Notes
+        -----
+        In most cases, there should be no functional difference from using
+        ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
+        """
 
     @Appender(_index_shared_docs['copy'])
     def copy(self, name=None, deep=False, dtype=None, **kwargs):
@@ -1047,24 +913,8 @@ def __deepcopy__(self, memo=None):
             memo = {}
         return self.copy(deep=True)
 
-    def _validate_names(self, name=None, names=None, deep=False):
-        """
-        Handles the quirks of having a singular 'name' parameter for general
-        Index and plural 'names' parameter for MultiIndex.
-        """
-        from copy import deepcopy
-        if names is not None and name is not None:
-            raise TypeError("Can only provide one of `names` and `name`")
-        elif names is None and name is None:
-            return deepcopy(self.names) if deep else self.names
-        elif names is not None:
-            if not is_list_like(names):
-                raise TypeError("Must pass list-like as `names`.")
-            return names
-        else:
-            if not is_list_like(name):
-                return [name]
-            return name
+    # --------------------------------------------------------------------
+    # Rendering Methods
 
     def __unicode__(self):
         """
@@ -1125,64 +975,192 @@ def _format_attrs(self):
         """
         return format_object_attrs(self)
 
-    def to_flat_index(self):
-        """
-        Identity method.
+    def _mpl_repr(self):
+        # how to represent ourselves to matplotlib
+        return self.values
 
-        .. versionadded:: 0.24.0
+    def format(self, name=False, formatter=None, **kwargs):
+        """
+        Render a string representation of the Index.
+        """
+        header = []
+        if name:
+            header.append(pprint_thing(self.name,
+                                       escape_chars=('\t', '\r', '\n')) if
+                          self.name is not None else '')
 
-        This is implemented for compatability with subclass implementations
-        when chaining.
+        if formatter is not None:
+            return header + list(self.map(formatter))
 
-        Returns
-        -------
-        pd.Index
-            Caller.
+        return self._format_with_header(header, **kwargs)
 
-        See Also
-        --------
-        MultiIndex.to_flat_index : Subclass implementation.
-        """
-        return self
+    def _format_with_header(self, header, na_rep='NaN', **kwargs):
+        values = self.values
 
-    def to_series(self, index=None, name=None):
-        """
-        Create a Series with both index and values equal to the index keys
-        useful with map for returning an indexer based on an index.
+        from pandas.io.formats.format import format_array
 
-        Parameters
-        ----------
-        index : Index, optional
-            index of resulting Series. If None, defaults to original index
-        name : string, optional
-            name of resulting Series. If None, defaults to name of original
-            index
+        if is_categorical_dtype(values.dtype):
+            values = np.array(values)
 
-        Returns
-        -------
-        Series : dtype will be based on the type of the Index values.
-        """
+        elif is_object_dtype(values.dtype):
+            values = lib.maybe_convert_objects(values, safe=1)
 
-        from pandas import Series
+        if is_object_dtype(values.dtype):
+            result = [pprint_thing(x, escape_chars=('\t', '\r', '\n'))
+                      for x in values]
 
-        if index is None:
-            index = self._shallow_copy()
-        if name is None:
-            name = self.name
+            # could have nans
+            mask = isna(values)
+            if mask.any():
+                result = np.array(result)
+                result[mask] = na_rep
+                result = result.tolist()
 
-        return Series(self.values.copy(), index=index, name=name)
+        else:
+            result = _trim_front(format_array(values, None, justify='left'))
+        return header + result
 
-    def to_frame(self, index=True, name=None):
+    def to_native_types(self, slicer=None, **kwargs):
         """
-        Create a DataFrame with a column containing the Index.
-
-        .. versionadded:: 0.24.0
+        Format specified values of `self` and return them.
 
         Parameters
         ----------
-        index : boolean, default True
-            Set the index of the returned DataFrame as the original Index.
-
+        slicer : int, array-like
+            An indexer into `self` that specifies which values
+            are used in the formatting process.
+        kwargs : dict
+            Options for specifying how the values should be formatted.
+            These options include the following:
+
+            1) na_rep : str
+                The value that serves as a placeholder for NULL values
+            2) quoting : bool or None
+                Whether or not there are quoted values in `self`
+            3) date_format : str
+                The format used to represent date-like values
+        """
+
+        values = self
+        if slicer is not None:
+            values = values[slicer]
+        return values._format_native_types(**kwargs)
+
+    def _format_native_types(self, na_rep='', quoting=None, **kwargs):
+        """
+        Actually format specific types of the index.
+        """
+        mask = isna(self)
+        if not self.is_object() and not quoting:
+            values = np.asarray(self).astype(str)
+        else:
+            values = np.array(self, dtype=object, copy=True)
+
+        values[mask] = na_rep
+        return values
+
+    def _summary(self, name=None):
+        """
+        Return a summarized representation.
+
+        Parameters
+        ----------
+        name : str
+            name to use in the summary representation
+
+        Returns
+        -------
+        String with a summarized representation of the index
+        """
+        if len(self) > 0:
+            head = self[0]
+            if (hasattr(head, 'format') and
+                    not isinstance(head, compat.string_types)):
+                head = head.format()
+            tail = self[-1]
+            if (hasattr(tail, 'format') and
+                    not isinstance(tail, compat.string_types)):
+                tail = tail.format()
+            index_summary = ', %s to %s' % (pprint_thing(head),
+                                            pprint_thing(tail))
+        else:
+            index_summary = ''
+
+        if name is None:
+            name = type(self).__name__
+        return '%s: %s entries%s' % (name, len(self), index_summary)
+
+    def summary(self, name=None):
+        """
+        Return a summarized representation.
+
+        .. deprecated:: 0.23.0
+        """
+        warnings.warn("'summary' is deprecated and will be removed in a "
+                      "future version.", FutureWarning, stacklevel=2)
+        return self._summary(name)
+
+    # --------------------------------------------------------------------
+    # Conversion Methods
+
+    def to_flat_index(self):
+        """
+        Identity method.
+
+        .. versionadded:: 0.24.0
+
+        This is implemented for compatability with subclass implementations
+        when chaining.
+
+        Returns
+        -------
+        pd.Index
+            Caller.
+
+        See Also
+        --------
+        MultiIndex.to_flat_index : Subclass implementation.
+        """
+        return self
+
+    def to_series(self, index=None, name=None):
+        """
+        Create a Series with both index and values equal to the index keys
+        useful with map for returning an indexer based on an index.
+
+        Parameters
+        ----------
+        index : Index, optional
+            index of resulting Series. If None, defaults to original index
+        name : string, optional
+            name of resulting Series. If None, defaults to name of original
+            index
+
+        Returns
+        -------
+        Series : dtype will be based on the type of the Index values.
+        """
+
+        from pandas import Series
+
+        if index is None:
+            index = self._shallow_copy()
+        if name is None:
+            name = self.name
+
+        return Series(self.values.copy(), index=index, name=name)
+
+    def to_frame(self, index=True, name=None):
+        """
+        Create a DataFrame with a column containing the Index.
+
+        .. versionadded:: 0.24.0
+
+        Parameters
+        ----------
+        index : boolean, default True
+            Set the index of the returned DataFrame as the original Index.
+
         name : object, default None
             The passed name should substitute for the index name (if it has
             one).
@@ -1233,83 +1211,27 @@ def to_frame(self, index=True, name=None):
             result.index = self
         return result
 
-    _index_shared_docs['astype'] = """
-        Create an Index with values cast to dtypes. The class of a new Index
-        is determined by dtype. When conversion is impossible, a ValueError
-        exception is raised.
-
-        Parameters
-        ----------
-        dtype : numpy dtype or pandas type
-        copy : bool, default True
-            By default, astype always returns a newly allocated object.
-            If copy is set to False and internal requirements on dtype are
-            satisfied, the original data is used to create a new Index
-            or the original Index is returned.
-
-            .. versionadded:: 0.19.0
-        """
-
-    @Appender(_index_shared_docs['astype'])
-    def astype(self, dtype, copy=True):
-        if is_dtype_equal(self.dtype, dtype):
-            return self.copy() if copy else self
-
-        elif is_categorical_dtype(dtype):
-            from .category import CategoricalIndex
-            return CategoricalIndex(self.values, name=self.name, dtype=dtype,
-                                    copy=copy)
-
-        elif is_extension_array_dtype(dtype):
-            return Index(np.asarray(self), dtype=dtype, copy=copy)
-
-        try:
-            if is_datetime64tz_dtype(dtype):
-                from pandas import DatetimeIndex
-                return DatetimeIndex(self.values, name=self.name, dtype=dtype,
-                                     copy=copy)
-            return Index(self.values.astype(dtype, copy=copy), name=self.name,
-                         dtype=dtype)
-        except (TypeError, ValueError):
-            msg = 'Cannot cast {name} to dtype {dtype}'
-            raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
+    # --------------------------------------------------------------------
+    # Name-Centric Methods
 
-    def _to_safe_for_reshape(self):
+    def _validate_names(self, name=None, names=None, deep=False):
         """
-        Convert to object if we are a categorical.
+        Handles the quirks of having a singular 'name' parameter for general
+        Index and plural 'names' parameter for MultiIndex.
         """
-        return self
-
-    def _assert_can_do_setop(self, other):
-        if not is_list_like(other):
-            raise TypeError('Input must be Index or array-like')
-        return True
-
-    def _convert_can_do_setop(self, other):
-        if not isinstance(other, Index):
-            other = Index(other, name=self.name)
-            result_name = self.name
+        from copy import deepcopy
+        if names is not None and name is not None:
+            raise TypeError("Can only provide one of `names` and `name`")
+        elif names is None and name is None:
+            return deepcopy(self.names) if deep else self.names
+        elif names is not None:
+            if not is_list_like(names):
+                raise TypeError("Must pass list-like as `names`.")
+            return names
         else:
-            result_name = get_op_result_name(self, other)
-        return other, result_name
-
-    def _convert_for_op(self, value):
-        """
-        Convert value to be insertable to ndarray.
-        """
-        return value
-
-    def _assert_can_do_op(self, value):
-        """
-        Check value is valid for scalar op.
-        """
-        if not is_scalar(value):
-            msg = "'value' must be a scalar, passed: {0}"
-            raise TypeError(msg.format(type(value).__name__))
-
-    @property
-    def nlevels(self):
-        return 1
+            if not is_list_like(name):
+                return [name]
+            return name
 
     def _get_names(self):
         return FrozenList((self.name, ))
@@ -1468,60 +1390,193 @@ def rename(self, name, inplace=False):
         """
         return self.set_names([name], inplace=inplace)
 
+    # --------------------------------------------------------------------
+    # Level-Centric Methods
+
     @property
-    def _has_complex_internals(self):
-        # to disable groupby tricks in MultiIndex
-        return False
+    def nlevels(self):
+        return 1
 
-    def _summary(self, name=None):
+    def _sort_levels_monotonic(self):
         """
-        Return a summarized representation.
-
-        Parameters
-        ----------
-        name : str
-            name to use in the summary representation
+        Compat with MultiIndex.
+        """
+        return self
 
-        Returns
-        -------
-        String with a summarized representation of the index
+    def _validate_index_level(self, level):
         """
-        if len(self) > 0:
-            head = self[0]
-            if (hasattr(head, 'format') and
-                    not isinstance(head, compat.string_types)):
-                head = head.format()
-            tail = self[-1]
-            if (hasattr(tail, 'format') and
-                    not isinstance(tail, compat.string_types)):
-                tail = tail.format()
-            index_summary = ', %s to %s' % (pprint_thing(head),
-                                            pprint_thing(tail))
-        else:
-            index_summary = ''
+        Validate index level.
 
-        if name is None:
-            name = type(self).__name__
-        return '%s: %s entries%s' % (name, len(self), index_summary)
+        For single-level Index getting level number is a no-op, but some
+        verification must be done like in MultiIndex.
 
-    def summary(self, name=None):
         """
-        Return a summarized representation.
+        if isinstance(level, int):
+            if level < 0 and level != -1:
+                raise IndexError("Too many levels: Index has only 1 level,"
+                                 " %d is not a valid level number" % (level, ))
+            elif level > 0:
+                raise IndexError("Too many levels:"
+                                 " Index has only 1 level, not %d" %
+                                 (level + 1))
+        elif level != self.name:
+            raise KeyError('Level %s must be same as name (%s)' %
+                           (level, self.name))
 
-        .. deprecated:: 0.23.0
+    def _get_level_number(self, level):
+        self._validate_index_level(level)
+        return 0
+
+    def sortlevel(self, level=None, ascending=True, sort_remaining=None):
         """
-        warnings.warn("'summary' is deprecated and will be removed in a "
-                      "future version.", FutureWarning, stacklevel=2)
-        return self._summary(name)
+        For internal compatibility with with the Index API.
 
-    def _mpl_repr(self):
-        # how to represent ourselves to matplotlib
-        return self.values
+        Sort the Index. This is for compat with MultiIndex
 
-    _na_value = np.nan
-    """The expected NA value to use with this index."""
+        Parameters
+        ----------
+        ascending : boolean, default True
+            False to sort in descending order
+
+        level, sort_remaining are compat parameters
+
+        Returns
+        -------
+        sorted_index : Index
+        """
+        return self.sort_values(return_indexer=True, ascending=ascending)
+
+    def _get_level_values(self, level):
+        """
+        Return an Index of values for requested level.
+
+        This is primarily useful to get an individual level of values from a
+        MultiIndex, but is provided on Index as well for compatability.
+
+        Parameters
+        ----------
+        level : int or str
+            It is either the integer position or the name of the level.
+
+        Returns
+        -------
+        values : Index
+            Calling object, as there is only one level in the Index.
+
+        See Also
+        --------
+        MultiIndex.get_level_values : Get values for a level of a MultiIndex.
+
+        Notes
+        -----
+        For Index, level should be 0, since there are no multiple levels.
+
+        Examples
+        --------
+
+        >>> idx = pd.Index(list('abc'))
+        >>> idx
+        Index(['a', 'b', 'c'], dtype='object')
+
+        Get level values by supplying `level` as integer:
+
+        >>> idx.get_level_values(0)
+        Index(['a', 'b', 'c'], dtype='object')
+        """
+        self._validate_index_level(level)
+        return self
+
+    get_level_values = _get_level_values
+
+    def droplevel(self, level=0):
+        """
+        Return index with requested level(s) removed.
+
+        If resulting index has only 1 level left, the result will be
+        of Index type, not MultiIndex.
+
+        .. versionadded:: 0.23.1 (support for non-MultiIndex)
+
+        Parameters
+        ----------
+        level : int, str, or list-like, default 0
+            If a string is given, must be the name of a level
+            If list-like, elements must be names or indexes of levels.
+
+        Returns
+        -------
+        index : Index or MultiIndex
+        """
+        if not isinstance(level, (tuple, list)):
+            level = [level]
+
+        levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
+
+        if len(level) == 0:
+            return self
+        if len(level) >= self.nlevels:
+            raise ValueError("Cannot remove {} levels from an index with {} "
+                             "levels: at least one level must be "
+                             "left.".format(len(level), self.nlevels))
+        # The two checks above guarantee that here self is a MultiIndex
+
+        new_levels = list(self.levels)
+        new_labels = list(self.labels)
+        new_names = list(self.names)
+
+        for i in levnums:
+            new_levels.pop(i)
+            new_labels.pop(i)
+            new_names.pop(i)
+
+        if len(new_levels) == 1:
+
+            # set nan if needed
+            mask = new_labels[0] == -1
+            result = new_levels[0].take(new_labels[0])
+            if mask.any():
+                result = result.putmask(mask, np.nan)
+
+            result.name = new_names[0]
+            return result
+        else:
+            from .multi import MultiIndex
+            return MultiIndex(levels=new_levels, labels=new_labels,
+                              names=new_names, verify_integrity=False)
+
+    _index_shared_docs['_get_grouper_for_level'] = """
+        Get index grouper corresponding to an index level
+
+        Parameters
+        ----------
+        mapper: Group mapping function or None
+            Function mapping index values to groups
+        level : int or None
+            Index level
+
+        Returns
+        -------
+        grouper : Index
+            Index of values to group on
+        labels : ndarray of int or None
+            Array of locations in level_index
+        uniques : Index or None
+            Index of unique values for level
+        """
+
+    @Appender(_index_shared_docs['_get_grouper_for_level'])
+    def _get_grouper_for_level(self, mapper, level=None):
+        assert level is None or level == 0
+        if mapper is None:
+            grouper = self
+        else:
+            grouper = self.map(mapper)
+
+        return grouper, None, None
+
+    # --------------------------------------------------------------------
+    # Introspection Methods
 
-    # introspection
     @property
     def is_monotonic(self):
         """
@@ -1671,234 +1726,385 @@ def is_mixed(self):
     def holds_integer(self):
         return self.inferred_type in ['integer', 'mixed-integer']
 
-    _index_shared_docs['_convert_scalar_indexer'] = """
-        Convert a scalar indexer.
+    @cache_readonly
+    def inferred_type(self):
+        """
+        Return a string of the type inferred from the values.
+        """
+        return lib.infer_dtype(self)
 
-        Parameters
-        ----------
-        key : label of the slice bound
-        kind : {'ix', 'loc', 'getitem', 'iloc'} or None
-    """
+    @cache_readonly
+    def is_all_dates(self):
+        if self._data is None:
+            return False
+        return is_datetime_array(ensure_object(self.values))
 
-    @Appender(_index_shared_docs['_convert_scalar_indexer'])
-    def _convert_scalar_indexer(self, key, kind=None):
-        assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
+    # --------------------------------------------------------------------
+    # Pickle Methods
 
-        if kind == 'iloc':
-            return self._validate_indexer('positional', key, kind)
+    def __reduce__(self):
+        d = dict(data=self._data)
+        d.update(self._get_attributes_dict())
+        return _new_Index, (self.__class__, d), None
 
-        if len(self) and not isinstance(self, ABCMultiIndex,):
+    def __setstate__(self, state):
+        """
+        Necessary for making this object picklable.
+        """
 
-            # we can raise here if we are definitive that this
-            # is positional indexing (eg. .ix on with a float)
-            # or label indexing if we are using a type able
-            # to be represented in the index
+        if isinstance(state, dict):
+            self._data = state.pop('data')
+            for k, v in compat.iteritems(state):
+                setattr(self, k, v)
 
-            if kind in ['getitem', 'ix'] and is_float(key):
-                if not self.is_floating():
-                    return self._invalid_indexer('label', key)
+        elif isinstance(state, tuple):
 
-            elif kind in ['loc'] and is_float(key):
+            if len(state) == 2:
+                nd_state, own_state = state
+                data = np.empty(nd_state[1], dtype=nd_state[2])
+                np.ndarray.__setstate__(data, nd_state)
+                self.name = own_state[0]
 
-                # we want to raise KeyError on string/mixed here
-                # technically we *could* raise a TypeError
-                # on anything but mixed though
-                if self.inferred_type not in ['floating',
-                                              'mixed-integer-float',
-                                              'string',
-                                              'unicode',
-                                              'mixed']:
-                    return self._invalid_indexer('label', key)
+            else:  # pragma: no cover
+                data = np.empty(state)
+                np.ndarray.__setstate__(data, state)
 
-            elif kind in ['loc'] and is_integer(key):
-                if not self.holds_integer():
-                    return self._invalid_indexer('label', key)
+            self._data = data
+            self._reset_identity()
+        else:
+            raise Exception("invalid pickle state")
 
-        return key
+    _unpickle_compat = __setstate__
 
-    _index_shared_docs['_convert_slice_indexer'] = """
-        Convert a slice indexer.
+    # --------------------------------------------------------------------
+    # Null Handling Methods
 
-        By definition, these are labels unless 'iloc' is passed in.
-        Floats are not allowed as the start, step, or stop of the slice.
+    _na_value = np.nan
+    """The expected NA value to use with this index."""
 
-        Parameters
-        ----------
-        key : label of the slice bound
-        kind : {'ix', 'loc', 'getitem', 'iloc'} or None
-    """
+    @cache_readonly
+    def _isnan(self):
+        """
+        Return if each value is NaN.
+        """
+        if self._can_hold_na:
+            return isna(self)
+        else:
+            # shouldn't reach to this condition by checking hasnans beforehand
+            values = np.empty(len(self), dtype=np.bool_)
+            values.fill(False)
+            return values
 
-    @Appender(_index_shared_docs['_convert_slice_indexer'])
-    def _convert_slice_indexer(self, key, kind=None):
-        assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
+    @cache_readonly
+    def _nan_idxs(self):
+        if self._can_hold_na:
+            w, = self._isnan.nonzero()
+            return w
+        else:
+            return np.array([], dtype=np.int64)
 
-        # if we are not a slice, then we are done
-        if not isinstance(key, slice):
-            return key
+    @cache_readonly
+    def hasnans(self):
+        """
+        Return if I have any nans; enables various perf speedups.
+        """
+        if self._can_hold_na:
+            return bool(self._isnan.any())
+        else:
+            return False
 
-        # validate iloc
-        if kind == 'iloc':
-            return slice(self._validate_indexer('slice', key.start, kind),
-                         self._validate_indexer('slice', key.stop, kind),
-                         self._validate_indexer('slice', key.step, kind))
+    def isna(self):
+        """
+        Detect missing values.
 
-        # potentially cast the bounds to integers
-        start, stop, step = key.start, key.stop, key.step
+        Return a boolean same-sized object indicating if the values are NA.
+        NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
+        mapped to ``True`` values.
+        Everything else get mapped to ``False`` values. Characters such as
+        empty strings `''` or :attr:`numpy.inf` are not considered NA values
+        (unless you set ``pandas.options.mode.use_inf_as_na = True``).
 
-        # figure out if this is a positional indexer
-        def is_int(v):
-            return v is None or is_integer(v)
+        .. versionadded:: 0.20.0
 
-        is_null_slicer = start is None and stop is None
-        is_index_slice = is_int(start) and is_int(stop)
-        is_positional = is_index_slice and not self.is_integer()
+        Returns
+        -------
+        numpy.ndarray
+            A boolean array of whether my values are NA
 
-        if kind == 'getitem':
-            """
-            called from the getitem slicers, validate that we are in fact
-            integers
-            """
-            if self.is_integer() or is_index_slice:
-                return slice(self._validate_indexer('slice', key.start, kind),
-                             self._validate_indexer('slice', key.stop, kind),
-                             self._validate_indexer('slice', key.step, kind))
+        See Also
+        --------
+        pandas.Index.notna : Boolean inverse of isna.
+        pandas.Index.dropna : Omit entries with missing values.
+        pandas.isna : Top-level isna.
+        Series.isna : Detect missing values in Series object.
 
-        # convert the slice to an indexer here
+        Examples
+        --------
+        Show which entries in a pandas.Index are NA. The result is an
+        array.
 
-        # if we are mixed and have integers
-        try:
-            if is_positional and self.is_mixed():
-                # Validate start & stop
-                if start is not None:
-                    self.get_loc(start)
-                if stop is not None:
-                    self.get_loc(stop)
-                is_positional = False
-        except KeyError:
-            if self.inferred_type == 'mixed-integer-float':
-                raise
+        >>> idx = pd.Index([5.2, 6.0, np.NaN])
+        >>> idx
+        Float64Index([5.2, 6.0, nan], dtype='float64')
+        >>> idx.isna()
+        array([False, False,  True], dtype=bool)
 
-        if is_null_slicer:
-            indexer = key
-        elif is_positional:
-            indexer = key
-        else:
-            try:
-                indexer = self.slice_indexer(start, stop, step, kind=kind)
-            except Exception:
-                if is_index_slice:
-                    if self.is_integer():
-                        raise
-                    else:
-                        indexer = key
-                else:
-                    raise
+        Empty strings are not considered NA values. None is considered an NA
+        value.
 
-        return indexer
+        >>> idx = pd.Index(['black', '', 'red', None])
+        >>> idx
+        Index(['black', '', 'red', None], dtype='object')
+        >>> idx.isna()
+        array([False, False, False,  True], dtype=bool)
 
-    def _convert_listlike_indexer(self, keyarr, kind=None):
-        """
-        Parameters
-        ----------
-        keyarr : list-like
-            Indexer to convert.
+        For datetimes, `NaT` (Not a Time) is considered as an NA value.
 
-        Returns
-        -------
-        tuple (indexer, keyarr)
-            indexer is an ndarray or None if cannot convert
-            keyarr are tuple-safe keys
+        >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
+        ...                         pd.Timestamp(''), None, pd.NaT])
+        >>> idx
+        DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
+                      dtype='datetime64[ns]', freq=None)
+        >>> idx.isna()
+        array([False,  True,  True,  True], dtype=bool)
         """
-        if isinstance(keyarr, Index):
-            keyarr = self._convert_index_indexer(keyarr)
-        else:
-            keyarr = self._convert_arr_indexer(keyarr)
+        return self._isnan
+    isnull = isna
 
-        indexer = self._convert_list_indexer(keyarr, kind=kind)
-        return indexer, keyarr
+    def notna(self):
+        """
+        Detect existing (non-missing) values.
 
-    _index_shared_docs['_convert_arr_indexer'] = """
-        Convert an array-like indexer to the appropriate dtype.
+        Return a boolean same-sized object indicating if the values are not NA.
+        Non-missing values get mapped to ``True``. Characters such as empty
+        strings ``''`` or :attr:`numpy.inf` are not considered NA values
+        (unless you set ``pandas.options.mode.use_inf_as_na = True``).
+        NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
+        values.
 
-        Parameters
-        ----------
-        keyarr : array-like
-            Indexer to convert.
+        .. versionadded:: 0.20.0
 
         Returns
         -------
-        converted_keyarr : array-like
-    """
+        numpy.ndarray
+            Boolean array to indicate which entries are not NA.
 
-    @Appender(_index_shared_docs['_convert_arr_indexer'])
-    def _convert_arr_indexer(self, keyarr):
-        keyarr = com.asarray_tuplesafe(keyarr)
-        return keyarr
+        See Also
+        --------
+        Index.notnull : Alias of notna.
+        Index.isna: Inverse of notna.
+        pandas.notna : Top-level notna.
 
-    _index_shared_docs['_convert_index_indexer'] = """
-        Convert an Index indexer to the appropriate dtype.
+        Examples
+        --------
+        Show which entries in an Index are not NA. The result is an
+        array.
+
+        >>> idx = pd.Index([5.2, 6.0, np.NaN])
+        >>> idx
+        Float64Index([5.2, 6.0, nan], dtype='float64')
+        >>> idx.notna()
+        array([ True,  True, False])
+
+        Empty strings are not considered NA values. None is considered a NA
+        value.
+
+        >>> idx = pd.Index(['black', '', 'red', None])
+        >>> idx
+        Index(['black', '', 'red', None], dtype='object')
+        >>> idx.notna()
+        array([ True,  True,  True, False])
+        """
+        return ~self.isna()
+    notnull = notna
+
+    _index_shared_docs['fillna'] = """
+        Fill NA/NaN values with the specified value
 
         Parameters
         ----------
-        keyarr : Index (or sub-class)
-            Indexer to convert.
+        value : scalar
+            Scalar value to use to fill holes (e.g. 0).
+            This value cannot be a list-likes.
+        downcast : dict, default is None
+            a dict of item->dtype of what to downcast if possible,
+            or the string 'infer' which will try to downcast to an appropriate
+            equal type (e.g. float64 to int64 if possible)
 
         Returns
         -------
-        converted_keyarr : Index (or sub-class)
-    """
+        filled : %(klass)s
+        """
 
-    @Appender(_index_shared_docs['_convert_index_indexer'])
-    def _convert_index_indexer(self, keyarr):
-        return keyarr
+    @Appender(_index_shared_docs['fillna'])
+    def fillna(self, value=None, downcast=None):
+        self._assert_can_do_op(value)
+        if self.hasnans:
+            result = self.putmask(self._isnan, value)
+            if downcast is None:
+                # no need to care metadata other than name
+                # because it can't have freq if
+                return Index(result, name=self.name)
+        return self._shallow_copy()
 
-    _index_shared_docs['_convert_list_indexer'] = """
-        Convert a list-like indexer to the appropriate dtype.
+    _index_shared_docs['dropna'] = """
+        Return Index without NA/NaN values
 
         Parameters
         ----------
-        keyarr : Index (or sub-class)
-            Indexer to convert.
-        kind : iloc, ix, loc, optional
+        how :  {'any', 'all'}, default 'any'
+            If the Index is a MultiIndex, drop the value when any or all levels
+            are NaN.
 
         Returns
         -------
-        positional indexer or None
-    """
+        valid : Index
+        """
 
-    @Appender(_index_shared_docs['_convert_list_indexer'])
-    def _convert_list_indexer(self, keyarr, kind=None):
-        if (kind in [None, 'iloc', 'ix'] and
-                is_integer_dtype(keyarr) and not self.is_floating() and
-                not isinstance(keyarr, ABCPeriodIndex)):
+    @Appender(_index_shared_docs['dropna'])
+    def dropna(self, how='any'):
+        if how not in ('any', 'all'):
+            raise ValueError("invalid how option: {0}".format(how))
 
-            if self.inferred_type == 'mixed-integer':
-                indexer = self.get_indexer(keyarr)
-                if (indexer >= 0).all():
-                    return indexer
-                # missing values are flagged as -1 by get_indexer and negative
-                # indices are already converted to positive indices in the
-                # above if-statement, so the negative flags are changed to
-                # values outside the range of indices so as to trigger an
-                # IndexError in maybe_convert_indices
-                indexer[indexer < 0] = len(self)
-                from pandas.core.indexing import maybe_convert_indices
-                return maybe_convert_indices(indexer, len(self))
+        if self.hasnans:
+            return self._shallow_copy(self.values[~self._isnan])
+        return self._shallow_copy()
 
-            elif not self.inferred_type == 'integer':
-                keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr)
-                return keyarr
+    # --------------------------------------------------------------------
+    # Uniqueness Methods
 
-        return None
+    _index_shared_docs['index_unique'] = (
+        """
+        Return unique values in the index. Uniques are returned in order
+        of appearance, this does NOT sort.
 
-    def _invalid_indexer(self, form, key):
+        Parameters
+        ----------
+        level : int or str, optional, default None
+            Only return values from specified level (for MultiIndex)
+
+            .. versionadded:: 0.23.0
+
+        Returns
+        -------
+        Index without duplicates
+
+        See Also
+        --------
+        unique
+        Series.unique
+        """)
+
+    @Appender(_index_shared_docs['index_unique'] % _index_doc_kwargs)
+    def unique(self, level=None):
+        if level is not None:
+            self._validate_index_level(level)
+        result = super(Index, self).unique()
+        return self._shallow_copy(result)
+
+    def drop_duplicates(self, keep='first'):
         """
-        Consistent invalid indexer message.
+        Return Index with duplicate values removed.
+
+        Parameters
+        ----------
+        keep : {'first', 'last', ``False``}, default 'first'
+            - 'first' : Drop duplicates except for the first occurrence.
+            - 'last' : Drop duplicates except for the last occurrence.
+            - ``False`` : Drop all duplicates.
+
+        Returns
+        -------
+        deduplicated : Index
+
+        See Also
+        --------
+        Series.drop_duplicates : Equivalent method on Series.
+        DataFrame.drop_duplicates : Equivalent method on DataFrame.
+        Index.duplicated : Related method on Index, indicating duplicate
+            Index values.
+
+        Examples
+        --------
+        Generate an pandas.Index with duplicate values.
+
+        >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
+
+        The `keep` parameter controls  which duplicate values are removed.
+        The value 'first' keeps the first occurrence for each
+        set of duplicated entries. The default value of keep is 'first'.
+
+        >>> idx.drop_duplicates(keep='first')
+        Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
+
+        The value 'last' keeps the last occurrence for each set of duplicated
+        entries.
+
+        >>> idx.drop_duplicates(keep='last')
+        Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')
+
+        The value ``False`` discards all sets of duplicated entries.
+
+        >>> idx.drop_duplicates(keep=False)
+        Index(['cow', 'beetle', 'hippo'], dtype='object')
         """
-        raise TypeError("cannot do {form} indexing on {klass} with these "
-                        "indexers [{key}] of {kind}".format(
-                            form=form, klass=type(self), key=key,
-                            kind=type(key)))
+        return super(Index, self).drop_duplicates(keep=keep)
+
+    def duplicated(self, keep='first'):
+        """
+        Indicate duplicate index values.
+
+        Duplicated values are indicated as ``True`` values in the resulting
+        array. Either all duplicates, all except the first, or all except the
+        last occurrence of duplicates can be indicated.
+
+        Parameters
+        ----------
+        keep : {'first', 'last', False}, default 'first'
+            The value or values in a set of duplicates to mark as missing.
+
+            - 'first' : Mark duplicates as ``True`` except for the first
+              occurrence.
+            - 'last' : Mark duplicates as ``True`` except for the last
+              occurrence.
+            - ``False`` : Mark all duplicates as ``True``.
+
+        Examples
+        --------
+        By default, for each set of duplicated values, the first occurrence is
+        set to False and all others to True:
+
+        >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])
+        >>> idx.duplicated()
+        array([False, False,  True, False,  True])
+
+        which is equivalent to
+
+        >>> idx.duplicated(keep='first')
+        array([False, False,  True, False,  True])
+
+        By using 'last', the last occurrence of each set of duplicated values
+        is set on False and all others on True:
+
+        >>> idx.duplicated(keep='last')
+        array([ True, False,  True, False, False])
+
+        By setting keep on ``False``, all duplicates are True:
+
+        >>> idx.duplicated(keep=False)
+        array([ True, False,  True, False,  True])
+
+        Returns
+        -------
+        numpy.ndarray
+
+        See Also
+        --------
+        pandas.Series.duplicated : Equivalent method on pandas.Series.
+        pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame.
+        pandas.Index.drop_duplicates : Remove duplicate values from Index.
+        """
+        return super(Index, self).duplicated(keep=keep)
 
     def get_duplicates(self):
         """
@@ -1959,97 +2165,65 @@ def get_duplicates(self):
 
         return self[self.duplicated()].unique()
 
-    def _cleanup(self):
-        self._engine.clear_mapping()
-
-    @cache_readonly
-    def _constructor(self):
-        return type(self)
+    def _get_unique_index(self, dropna=False):
+        """
+        Returns an index containing unique values.
 
-    @cache_readonly
-    def _engine(self):
-        # property, for now, slow to look up
-        return self._engine_type(lambda: self._ndarray_values, len(self))
+        Parameters
+        ----------
+        dropna : bool
+            If True, NaN values are dropped.
 
-    def _validate_index_level(self, level):
+        Returns
+        -------
+        uniques : index
         """
-        Validate index level.
+        if self.is_unique and not dropna:
+            return self
 
-        For single-level Index getting level number is a no-op, but some
-        verification must be done like in MultiIndex.
+        values = self.values
 
-        """
-        if isinstance(level, int):
-            if level < 0 and level != -1:
-                raise IndexError("Too many levels: Index has only 1 level,"
-                                 " %d is not a valid level number" % (level, ))
-            elif level > 0:
-                raise IndexError("Too many levels:"
-                                 " Index has only 1 level, not %d" %
-                                 (level + 1))
-        elif level != self.name:
-            raise KeyError('Level %s must be same as name (%s)' %
-                           (level, self.name))
+        if not self.is_unique:
+            values = self.unique()
 
-    def _get_level_number(self, level):
-        self._validate_index_level(level)
-        return 0
+        if dropna:
+            try:
+                if self.hasnans:
+                    values = values[~isna(values)]
+            except NotImplementedError:
+                pass
 
-    @cache_readonly
-    def inferred_type(self):
-        """
-        Return a string of the type inferred from the values.
-        """
-        return lib.infer_dtype(self)
+        return self._shallow_copy(values)
 
-    def _is_memory_usage_qualified(self):
-        """
-        Return a boolean if we need a qualified .info display.
-        """
-        return self.is_object()
+    # --------------------------------------------------------------------
+    # Arithmetic & Logical Methods
 
-    def is_type_compatible(self, kind):
-        return kind == self.inferred_type
+    def __add__(self, other):
+        if isinstance(other, (ABCSeries, ABCDataFrame)):
+            return NotImplemented
+        return Index(np.array(self) + other)
 
-    @cache_readonly
-    def is_all_dates(self):
-        if self._data is None:
-            return False
-        return is_datetime_array(ensure_object(self.values))
+    def __radd__(self, other):
+        return Index(other + np.array(self))
 
-    def __reduce__(self):
-        d = dict(data=self._data)
-        d.update(self._get_attributes_dict())
-        return _new_Index, (self.__class__, d), None
+    def __iadd__(self, other):
+        # alias for __add__
+        return self + other
 
-    def __setstate__(self, state):
-        """
-        Necessary for making this object picklable.
-        """
+    def __sub__(self, other):
+        return Index(np.array(self) - other)
 
-        if isinstance(state, dict):
-            self._data = state.pop('data')
-            for k, v in compat.iteritems(state):
-                setattr(self, k, v)
+    def __rsub__(self, other):
+        return Index(other - np.array(self))
 
-        elif isinstance(state, tuple):
+    def __and__(self, other):
+        return self.intersection(other)
 
-            if len(state) == 2:
-                nd_state, own_state = state
-                data = np.empty(nd_state[1], dtype=nd_state[2])
-                np.ndarray.__setstate__(data, nd_state)
-                self.name = own_state[0]
+    def __or__(self, other):
+        return self.union(other)
 
-            else:  # pragma: no cover
-                data = np.empty(state)
-                np.ndarray.__setstate__(data, state)
-
-            self._data = data
-            self._reset_identity()
-        else:
-            raise Exception("invalid pickle state")
-
-    _unpickle_compat = __setstate__
+    def __xor__(self, other):
+        return self.symmetric_difference(other)
 
     def __nonzero__(self):
         raise ValueError("The truth value of a {0} is ambiguous. "
@@ -2058,2236 +2232,2302 @@ def __nonzero__(self):
 
     __bool__ = __nonzero__
 
-    _index_shared_docs['__contains__'] = """
-        Return a boolean if this key is IN the index.
-
-        Parameters
-        ----------
-        key : object
+    # --------------------------------------------------------------------
+    # Set Operation Methods
 
-        Returns
-        -------
-        boolean
+    def _get_reconciled_name_object(self, other):
         """
+        If the result of a set operation will be self,
+        return self, unless the name changes, in which
+        case make a shallow copy of self.
+        """
+        name = get_op_result_name(self, other)
+        if self.name != name:
+            return self._shallow_copy(name=name)
+        return self
 
-    @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs)
-    def __contains__(self, key):
-        hash(key)
-        try:
-            return key in self._engine
-        except (OverflowError, TypeError, ValueError):
-            return False
-
-    _index_shared_docs['contains'] = """
-        Return a boolean if this key is IN the index.
+    def union(self, other):
+        """
+        Form the union of two Index objects and sorts if possible.
 
         Parameters
         ----------
-        key : object
+        other : Index or array-like
 
         Returns
         -------
-        boolean
+        union : Index
+
+        Examples
+        --------
+
+        >>> idx1 = pd.Index([1, 2, 3, 4])
+        >>> idx2 = pd.Index([3, 4, 5, 6])
+        >>> idx1.union(idx2)
+        Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
         """
+        self._assert_can_do_setop(other)
+        other = ensure_index(other)
 
-    @Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
-    def contains(self, key):
-        hash(key)
-        try:
-            return key in self._engine
-        except (TypeError, ValueError):
-            return False
+        if len(other) == 0 or self.equals(other):
+            return self._get_reconciled_name_object(other)
 
-    def __hash__(self):
-        raise TypeError("unhashable type: %r" % type(self).__name__)
+        if len(self) == 0:
+            return other._get_reconciled_name_object(self)
 
-    def __setitem__(self, key, value):
-        raise TypeError("Index does not support mutable operations")
+        # TODO: is_dtype_union_equal is a hack around
+        # 1. buggy set ops with duplicates (GH #13432)
+        # 2. CategoricalIndex lacking setops (GH #10186)
+        # Once those are fixed, this workaround can be removed
+        if not is_dtype_union_equal(self.dtype, other.dtype):
+            this = self.astype('O')
+            other = other.astype('O')
+            return this.union(other)
 
-    def __getitem__(self, key):
-        """
-        Override numpy.ndarray's __getitem__ method to work as desired.
+        # TODO(EA): setops-refactor, clean all this up
+        if is_period_dtype(self) or is_datetime64tz_dtype(self):
+            lvals = self._ndarray_values
+        else:
+            lvals = self._values
+        if is_period_dtype(other) or is_datetime64tz_dtype(other):
+            rvals = other._ndarray_values
+        else:
+            rvals = other._values
 
-        This function adds lists and Series as valid boolean indexers
-        (ndarrays only supports ndarray with dtype=bool).
+        if self.is_monotonic and other.is_monotonic:
+            try:
+                result = self._outer_indexer(lvals, rvals)[0]
+            except TypeError:
+                # incomparable objects
+                result = list(lvals)
 
-        If resulting ndim != 1, plain ndarray is returned instead of
-        corresponding `Index` subclass.
+                # worth making this faster? a very unusual case
+                value_set = set(lvals)
+                result.extend([x for x in rvals if x not in value_set])
+        else:
+            indexer = self.get_indexer(other)
+            indexer, = (indexer == -1).nonzero()
 
-        """
-        # There's no custom logic to be implemented in __getslice__, so it's
-        # not overloaded intentionally.
-        getitem = self._data.__getitem__
-        promote = self._shallow_copy
+            if len(indexer) > 0:
+                other_diff = algos.take_nd(rvals, indexer,
+                                           allow_fill=False)
+                result = _concat._concat_compat((lvals, other_diff))
 
-        if is_scalar(key):
-            key = com.cast_scalar_indexer(key)
-            return getitem(key)
+                try:
+                    lvals[0] < other_diff[0]
+                except TypeError as e:
+                    warnings.warn("%s, sort order is undefined for "
+                                  "incomparable objects" % e, RuntimeWarning,
+                                  stacklevel=3)
+                else:
+                    types = frozenset((self.inferred_type,
+                                       other.inferred_type))
+                    if not types & _unsortable_types:
+                        result.sort()
 
-        if isinstance(key, slice):
-            # This case is separated from the conditional above to avoid
-            # pessimization of basic indexing.
-            return promote(getitem(key))
+            else:
+                result = lvals
 
-        if com.is_bool_indexer(key):
-            key = np.asarray(key, dtype=bool)
+                try:
+                    result = np.sort(result)
+                except TypeError as e:
+                    warnings.warn("%s, sort order is undefined for "
+                                  "incomparable objects" % e, RuntimeWarning,
+                                  stacklevel=3)
 
-        key = com.values_from_object(key)
-        result = getitem(key)
-        if not is_scalar(result):
-            return promote(result)
-        else:
-            return result
+        # for subclasses
+        return self._wrap_setop_result(other, result)
 
-    def _can_hold_identifiers_and_holds_name(self, name):
-        """
-        Faster check for ``name in self`` when we know `name` is a Python
-        identifier (e.g. in NDFrame.__getattr__, which hits this to support
-        . key lookup). For indexes that can't hold identifiers (everything
-        but object & categorical) we just return False.
+    def _wrap_setop_result(self, other, result):
+        return self._constructor(result, name=get_op_result_name(self, other))
 
-        https://github.com/pandas-dev/pandas/issues/19764
+    def intersection(self, other):
         """
-        if self.is_object() or self.is_categorical():
-            return name in self
-        return False
+        Form the intersection of two Index objects.
 
-    def append(self, other):
-        """
-        Append a collection of Index options together.
+        This returns a new Index with elements common to the index and `other`,
+        preserving the order of the calling index.
 
         Parameters
         ----------
-        other : Index or list/tuple of indices
+        other : Index or array-like
 
         Returns
         -------
-        appended : Index
-        """
+        intersection : Index
 
-        to_concat = [self]
+        Examples
+        --------
 
-        if isinstance(other, (list, tuple)):
-            to_concat = to_concat + list(other)
-        else:
-            to_concat.append(other)
+        >>> idx1 = pd.Index([1, 2, 3, 4])
+        >>> idx2 = pd.Index([3, 4, 5, 6])
+        >>> idx1.intersection(idx2)
+        Int64Index([3, 4], dtype='int64')
+        """
+        self._assert_can_do_setop(other)
+        other = ensure_index(other)
 
-        for obj in to_concat:
-            if not isinstance(obj, Index):
-                raise TypeError('all inputs must be Index')
+        if self.equals(other):
+            return self._get_reconciled_name_object(other)
 
-        names = {obj.name for obj in to_concat}
-        name = None if len(names) > 1 else self.name
+        if not is_dtype_equal(self.dtype, other.dtype):
+            this = self.astype('O')
+            other = other.astype('O')
+            return this.intersection(other)
 
-        return self._concat(to_concat, name)
+        # TODO(EA): setops-refactor, clean all this up
+        if is_period_dtype(self):
+            lvals = self._ndarray_values
+        else:
+            lvals = self._values
+        if is_period_dtype(other):
+            rvals = other._ndarray_values
+        else:
+            rvals = other._values
 
-    def _concat(self, to_concat, name):
+        if self.is_monotonic and other.is_monotonic:
+            try:
+                result = self._inner_indexer(lvals, rvals)[0]
+                return self._wrap_setop_result(other, result)
+            except TypeError:
+                pass
 
-        typs = _concat.get_dtype_kinds(to_concat)
+        try:
+            indexer = Index(rvals).get_indexer(lvals)
+            indexer = indexer.take((indexer != -1).nonzero()[0])
+        except Exception:
+            # duplicates
+            indexer = algos.unique1d(
+                Index(rvals).get_indexer_non_unique(lvals)[0])
+            indexer = indexer[indexer != -1]
 
-        if len(typs) == 1:
-            return self._concat_same_dtype(to_concat, name=name)
-        return _concat._concat_index_asobject(to_concat, name=name)
+        taken = other.take(indexer)
+        if self.name != other.name:
+            taken.name = None
+        return taken
 
-    def _concat_same_dtype(self, to_concat, name):
-        """
-        Concatenate to_concat which has the same class.
+    def difference(self, other, sort=True):
         """
-        # must be overridden in specific classes
-        return _concat._concat_index_asobject(to_concat, name)
-
-    _index_shared_docs['take'] = """
-        Return a new %(klass)s of the values selected by the indices.
+        Return a new Index with elements from the index that are not in
+        `other`.
 
-        For internal compatibility with numpy arrays.
+        This is the set difference of two Index objects.
 
         Parameters
         ----------
-        indices : list
-            Indices to be taken
-        axis : int, optional
-            The axis over which to select values, always 0.
-        allow_fill : bool, default True
-        fill_value : bool, default None
-            If allow_fill=True and fill_value is not None, indices specified by
-            -1 is regarded as NA. If Index doesn't hold NA, raise ValueError
+        other : Index or array-like
+        sort : bool, default True
+            Sort the resulting index if possible
 
-        See Also
+            .. versionadded:: 0.24.0
+
+        Returns
+        -------
+        difference : Index
+
+        Examples
         --------
-        numpy.ndarray.take
+
+        >>> idx1 = pd.Index([2, 1, 3, 4])
+        >>> idx2 = pd.Index([3, 4, 5, 6])
+        >>> idx1.difference(idx2)
+        Int64Index([1, 2], dtype='int64')
+        >>> idx1.difference(idx2, sort=False)
+        Int64Index([2, 1], dtype='int64')
         """
+        self._assert_can_do_setop(other)
 
-    @Appender(_index_shared_docs['take'] % _index_doc_kwargs)
-    def take(self, indices, axis=0, allow_fill=True,
-             fill_value=None, **kwargs):
-        if kwargs:
-            nv.validate_take(tuple(), kwargs)
-        indices = ensure_platform_int(indices)
-        if self._can_hold_na:
-            taken = self._assert_take_fillable(self.values, indices,
-                                               allow_fill=allow_fill,
-                                               fill_value=fill_value,
-                                               na_value=self._na_value)
-        else:
-            if allow_fill and fill_value is not None:
-                msg = 'Unable to fill values because {0} cannot contain NA'
-                raise ValueError(msg.format(self.__class__.__name__))
-            taken = self.values.take(indices)
-        return self._shallow_copy(taken)
+        if self.equals(other):
+            # pass an empty np.ndarray with the appropriate dtype
+            return self._shallow_copy(self._data[:0])
 
-    def _assert_take_fillable(self, values, indices, allow_fill=True,
-                              fill_value=None, na_value=np.nan):
-        """
-        Internal method to handle NA filling of take.
-        """
-        indices = ensure_platform_int(indices)
+        other, result_name = self._convert_can_do_setop(other)
 
-        # only fill if we are passing a non-None fill_value
-        if allow_fill and fill_value is not None:
-            if (indices < -1).any():
-                msg = ('When allow_fill=True and fill_value is not None, '
-                       'all indices must be >= -1')
-                raise ValueError(msg)
-            taken = algos.take(values,
-                               indices,
-                               allow_fill=allow_fill,
-                               fill_value=na_value)
-        else:
-            taken = values.take(indices)
-        return taken
+        this = self._get_unique_index()
 
-    @cache_readonly
-    def _isnan(self):
-        """
-        Return if each value is NaN.
-        """
-        if self._can_hold_na:
-            return isna(self)
-        else:
-            # shouldn't reach to this condition by checking hasnans beforehand
-            values = np.empty(len(self), dtype=np.bool_)
-            values.fill(False)
-            return values
+        indexer = this.get_indexer(other)
+        indexer = indexer.take((indexer != -1).nonzero()[0])
 
-    @cache_readonly
-    def _nan_idxs(self):
-        if self._can_hold_na:
-            w, = self._isnan.nonzero()
-            return w
-        else:
-            return np.array([], dtype=np.int64)
+        label_diff = np.setdiff1d(np.arange(this.size), indexer,
+                                  assume_unique=True)
+        the_diff = this.values.take(label_diff)
+        if sort:
+            try:
+                the_diff = sorting.safe_sort(the_diff)
+            except TypeError:
+                pass
 
-    @cache_readonly
-    def hasnans(self):
-        """
-        Return if I have any nans; enables various perf speedups.
-        """
-        if self._can_hold_na:
-            return bool(self._isnan.any())
-        else:
-            return False
+        return this._shallow_copy(the_diff, name=result_name, freq=None)
 
-    def isna(self):
+    def symmetric_difference(self, other, result_name=None):
         """
-        Detect missing values.
+        Compute the symmetric difference of two Index objects.
 
-        Return a boolean same-sized object indicating if the values are NA.
-        NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
-        mapped to ``True`` values.
-        Everything else get mapped to ``False`` values. Characters such as
-        empty strings `''` or :attr:`numpy.inf` are not considered NA values
-        (unless you set ``pandas.options.mode.use_inf_as_na = True``).
+        It's sorted if sorting is possible.
 
-        .. versionadded:: 0.20.0
+        Parameters
+        ----------
+        other : Index or array-like
+        result_name : str
 
         Returns
         -------
-        numpy.ndarray
-            A boolean array of whether my values are NA
+        symmetric_difference : Index
 
-        See Also
-        --------
-        pandas.Index.notna : Boolean inverse of isna.
-        pandas.Index.dropna : Omit entries with missing values.
-        pandas.isna : Top-level isna.
-        Series.isna : Detect missing values in Series object.
+        Notes
+        -----
+        ``symmetric_difference`` contains elements that appear in either
+        ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by
+        ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates
+        dropped.
 
         Examples
         --------
-        Show which entries in a pandas.Index are NA. The result is an
-        array.
+        >>> idx1 = pd.Index([1, 2, 3, 4])
+        >>> idx2 = pd.Index([2, 3, 4, 5])
+        >>> idx1.symmetric_difference(idx2)
+        Int64Index([1, 5], dtype='int64')
 
-        >>> idx = pd.Index([5.2, 6.0, np.NaN])
-        >>> idx
-        Float64Index([5.2, 6.0, nan], dtype='float64')
-        >>> idx.isna()
-        array([False, False,  True], dtype=bool)
+        You can also use the ``^`` operator:
 
-        Empty strings are not considered NA values. None is considered an NA
-        value.
+        >>> idx1 ^ idx2
+        Int64Index([1, 5], dtype='int64')
+        """
+        self._assert_can_do_setop(other)
+        other, result_name_update = self._convert_can_do_setop(other)
+        if result_name is None:
+            result_name = result_name_update
 
-        >>> idx = pd.Index(['black', '', 'red', None])
-        >>> idx
-        Index(['black', '', 'red', None], dtype='object')
-        >>> idx.isna()
-        array([False, False, False,  True], dtype=bool)
+        this = self._get_unique_index()
+        other = other._get_unique_index()
+        indexer = this.get_indexer(other)
 
-        For datetimes, `NaT` (Not a Time) is considered as an NA value.
+        # {this} minus {other}
+        common_indexer = indexer.take((indexer != -1).nonzero()[0])
+        left_indexer = np.setdiff1d(np.arange(this.size), common_indexer,
+                                    assume_unique=True)
+        left_diff = this.values.take(left_indexer)
 
-        >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
-        ...                         pd.Timestamp(''), None, pd.NaT])
-        >>> idx
-        DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
-                      dtype='datetime64[ns]', freq=None)
-        >>> idx.isna()
-        array([False,  True,  True,  True], dtype=bool)
-        """
-        return self._isnan
-    isnull = isna
+        # {other} minus {this}
+        right_indexer = (indexer == -1).nonzero()[0]
+        right_diff = other.values.take(right_indexer)
 
-    def notna(self):
-        """
-        Detect existing (non-missing) values.
+        the_diff = _concat._concat_compat([left_diff, right_diff])
+        try:
+            the_diff = sorting.safe_sort(the_diff)
+        except TypeError:
+            pass
 
-        Return a boolean same-sized object indicating if the values are not NA.
-        Non-missing values get mapped to ``True``. Characters such as empty
-        strings ``''`` or :attr:`numpy.inf` are not considered NA values
-        (unless you set ``pandas.options.mode.use_inf_as_na = True``).
-        NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
-        values.
+        attribs = self._get_attributes_dict()
+        attribs['name'] = result_name
+        if 'freq' in attribs:
+            attribs['freq'] = None
+        return self._shallow_copy_with_infer(the_diff, **attribs)
 
-        .. versionadded:: 0.20.0
+    def _assert_can_do_setop(self, other):
+        if not is_list_like(other):
+            raise TypeError('Input must be Index or array-like')
+        return True
 
-        Returns
-        -------
-        numpy.ndarray
-            Boolean array to indicate which entries are not NA.
+    def _convert_can_do_setop(self, other):
+        if not isinstance(other, Index):
+            other = Index(other, name=self.name)
+            result_name = self.name
+        else:
+            result_name = get_op_result_name(self, other)
+        return other, result_name
 
-        See Also
-        --------
-        Index.notnull : Alias of notna.
-        Index.isna: Inverse of notna.
-        pandas.notna : Top-level notna.
+    # --------------------------------------------------------------------
+    # Indexing Methods
 
-        Examples
-        --------
-        Show which entries in an Index are not NA. The result is an
-        array.
+    _index_shared_docs['get_loc'] = """
+        Get integer location, slice or boolean mask for requested label.
 
-        >>> idx = pd.Index([5.2, 6.0, np.NaN])
-        >>> idx
-        Float64Index([5.2, 6.0, nan], dtype='float64')
-        >>> idx.notna()
-        array([ True,  True, False])
+        Parameters
+        ----------
+        key : label
+        method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
+            * default: exact matches only.
+            * pad / ffill: find the PREVIOUS index value if no exact match.
+            * backfill / bfill: use NEXT index value if no exact match
+            * nearest: use the NEAREST index value if no exact match. Tied
+              distances are broken by preferring the larger index value.
+        tolerance : optional
+            Maximum distance from index value for inexact matches. The value of
+            the index at the matching location most satisfy the equation
+            ``abs(index[loc] - key) <= tolerance``.
 
-        Empty strings are not considered NA values. None is considered a NA
-        value.
+            Tolerance may be a scalar
+            value, which applies the same tolerance to all values, or
+            list-like, which applies variable tolerance per element. List-like
+            includes list, tuple, array, Series, and must be the same size as
+            the index and its dtype must exactly match the index's type.
 
-        >>> idx = pd.Index(['black', '', 'red', None])
-        >>> idx
-        Index(['black', '', 'red', None], dtype='object')
-        >>> idx.notna()
-        array([ True,  True,  True, False])
-        """
-        return ~self.isna()
-    notnull = notna
+            .. versionadded:: 0.21.0 (list-like tolerance)
 
-    def putmask(self, mask, value):
-        """
-        Return a new Index of the values set with the mask.
+        Returns
+        -------
+        loc : int if unique index, slice if monotonic index, else mask
 
-        See Also
-        --------
-        numpy.ndarray.putmask
-        """
-        values = self.values.copy()
-        try:
-            np.putmask(values, mask, self._convert_for_op(value))
-            return self._shallow_copy(values)
-        except (ValueError, TypeError) as err:
-            if is_object_dtype(self):
-                raise err
+        Examples
+        ---------
+        >>> unique_index = pd.Index(list('abc'))
+        >>> unique_index.get_loc('b')
+        1
 
-            # coerces to object
-            return self.astype(object).putmask(mask, value)
+        >>> monotonic_index = pd.Index(list('abbc'))
+        >>> monotonic_index.get_loc('b')
+        slice(1, 3, None)
 
-    def format(self, name=False, formatter=None, **kwargs):
-        """
-        Render a string representation of the Index.
+        >>> non_monotonic_index = pd.Index(list('abcb'))
+        >>> non_monotonic_index.get_loc('b')
+        array([False,  True, False,  True], dtype=bool)
         """
-        header = []
-        if name:
-            header.append(pprint_thing(self.name,
-                                       escape_chars=('\t', '\r', '\n')) if
-                          self.name is not None else '')
 
-        if formatter is not None:
-            return header + list(self.map(formatter))
+    @Appender(_index_shared_docs['get_loc'])
+    def get_loc(self, key, method=None, tolerance=None):
+        if method is None:
+            if tolerance is not None:
+                raise ValueError('tolerance argument only valid if using pad, '
+                                 'backfill or nearest lookups')
+            try:
+                return self._engine.get_loc(key)
+            except KeyError:
+                return self._engine.get_loc(self._maybe_cast_indexer(key))
+        indexer = self.get_indexer([key], method=method, tolerance=tolerance)
+        if indexer.ndim > 1 or indexer.size > 1:
+            raise TypeError('get_loc requires scalar valued input')
+        loc = indexer.item()
+        if loc == -1:
+            raise KeyError(key)
+        return loc
 
-        return self._format_with_header(header, **kwargs)
+    _index_shared_docs['get_indexer'] = """
+        Compute indexer and mask for new index given the current index. The
+        indexer should be then used as an input to ndarray.take to align the
+        current data to the new index.
 
-    def _format_with_header(self, header, na_rep='NaN', **kwargs):
-        values = self.values
+        Parameters
+        ----------
+        target : %(target_klass)s
+        method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
+            * default: exact matches only.
+            * pad / ffill: find the PREVIOUS index value if no exact match.
+            * backfill / bfill: use NEXT index value if no exact match
+            * nearest: use the NEAREST index value if no exact match. Tied
+              distances are broken by preferring the larger index value.
+        limit : int, optional
+            Maximum number of consecutive labels in ``target`` to match for
+            inexact matches.
+        tolerance : optional
+            Maximum distance between original and new labels for inexact
+            matches. The values of the index at the matching locations most
+            satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
 
-        from pandas.io.formats.format import format_array
+            Tolerance may be a scalar value, which applies the same tolerance
+            to all values, or list-like, which applies variable tolerance per
+            element. List-like includes list, tuple, array, Series, and must be
+            the same size as the index and its dtype must exactly match the
+            index's type.
 
-        if is_categorical_dtype(values.dtype):
-            values = np.array(values)
+            .. versionadded:: 0.21.0 (list-like tolerance)
 
-        elif is_object_dtype(values.dtype):
-            values = lib.maybe_convert_objects(values, safe=1)
+        Returns
+        -------
+        indexer : ndarray of int
+            Integers from 0 to n - 1 indicating that the index at these
+            positions matches the corresponding target values. Missing values
+            in the target are marked by -1.
 
-        if is_object_dtype(values.dtype):
-            result = [pprint_thing(x, escape_chars=('\t', '\r', '\n'))
-                      for x in values]
+        Examples
+        --------
+        >>> index = pd.Index(['c', 'a', 'b'])
+        >>> index.get_indexer(['a', 'b', 'x'])
+        array([ 1,  2, -1])
 
-            # could have nans
-            mask = isna(values)
-            if mask.any():
-                result = np.array(result)
-                result[mask] = na_rep
-                result = result.tolist()
+        Notice that the return value is an array of locations in ``index``
+        and ``x`` is marked by -1, as it is not in ``index``.
+        """
 
-        else:
-            result = _trim_front(format_array(values, None, justify='left'))
-        return header + result
+    @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
+    def get_indexer(self, target, method=None, limit=None, tolerance=None):
+        method = missing.clean_reindex_fill_method(method)
+        target = ensure_index(target)
+        if tolerance is not None:
+            tolerance = self._convert_tolerance(tolerance, target)
 
-    def to_native_types(self, slicer=None, **kwargs):
-        """
-        Format specified values of `self` and return them.
+        # Treat boolean labels passed to a numeric index as not found. Without
+        # this fix False and True would be treated as 0 and 1 respectively.
+        # (GH #16877)
+        if target.is_boolean() and self.is_numeric():
+            return ensure_platform_int(np.repeat(-1, target.size))
 
-        Parameters
-        ----------
-        slicer : int, array-like
-            An indexer into `self` that specifies which values
-            are used in the formatting process.
-        kwargs : dict
-            Options for specifying how the values should be formatted.
-            These options include the following:
+        pself, ptarget = self._maybe_promote(target)
+        if pself is not self or ptarget is not target:
+            return pself.get_indexer(ptarget, method=method, limit=limit,
+                                     tolerance=tolerance)
 
-            1) na_rep : str
-                The value that serves as a placeholder for NULL values
-            2) quoting : bool or None
-                Whether or not there are quoted values in `self`
-            3) date_format : str
-                The format used to represent date-like values
-        """
+        if not is_dtype_equal(self.dtype, target.dtype):
+            this = self.astype(object)
+            target = target.astype(object)
+            return this.get_indexer(target, method=method, limit=limit,
+                                    tolerance=tolerance)
 
-        values = self
-        if slicer is not None:
-            values = values[slicer]
-        return values._format_native_types(**kwargs)
+        if not self.is_unique:
+            raise InvalidIndexError('Reindexing only valid with uniquely'
+                                    ' valued Index objects')
 
-    def _format_native_types(self, na_rep='', quoting=None, **kwargs):
-        """
-        Actually format specific types of the index.
-        """
-        mask = isna(self)
-        if not self.is_object() and not quoting:
-            values = np.asarray(self).astype(str)
+        if method == 'pad' or method == 'backfill':
+            indexer = self._get_fill_indexer(target, method, limit, tolerance)
+        elif method == 'nearest':
+            indexer = self._get_nearest_indexer(target, limit, tolerance)
         else:
-            values = np.array(self, dtype=object, copy=True)
-
-        values[mask] = na_rep
-        return values
+            if tolerance is not None:
+                raise ValueError('tolerance argument only valid if doing pad, '
+                                 'backfill or nearest reindexing')
+            if limit is not None:
+                raise ValueError('limit argument only valid if doing pad, '
+                                 'backfill or nearest reindexing')
 
-    def equals(self, other):
-        """
-        Determines if two Index objects contain the same elements.
-        """
-        if self.is_(other):
-            return True
+            indexer = self._engine.get_indexer(target._ndarray_values)
 
-        if not isinstance(other, Index):
-            return False
+        return ensure_platform_int(indexer)
 
-        if is_object_dtype(self) and not is_object_dtype(other):
-            # if other is not object, use other's logic for coercion
-            return other.equals(self)
+    def _convert_tolerance(self, tolerance, target):
+        # override this method on subclasses
+        tolerance = np.asarray(tolerance)
+        if target.size != tolerance.size and tolerance.size > 1:
+            raise ValueError('list-like tolerance size must match '
+                             'target index size')
+        return tolerance
 
-        try:
-            return array_equivalent(com.values_from_object(self),
-                                    com.values_from_object(other))
-        except Exception:
-            return False
+    def _get_fill_indexer(self, target, method, limit=None, tolerance=None):
+        if self.is_monotonic_increasing and target.is_monotonic_increasing:
+            method = (self._engine.get_pad_indexer if method == 'pad' else
+                      self._engine.get_backfill_indexer)
+            indexer = method(target._ndarray_values, limit)
+        else:
+            indexer = self._get_fill_indexer_searchsorted(target, method,
+                                                          limit)
+        if tolerance is not None:
+            indexer = self._filter_indexer_tolerance(target._ndarray_values,
+                                                     indexer,
+                                                     tolerance)
+        return indexer
 
-    def identical(self, other):
-        """
-        Similar to equals, but check that other comparable attributes are
-        also equal.
+    def _get_fill_indexer_searchsorted(self, target, method, limit=None):
         """
-        return (self.equals(other) and
-                all((getattr(self, c, None) == getattr(other, c, None)
-                     for c in self._comparables)) and
-                type(self) == type(other))
-
-    def asof(self, label):
+        Fallback pad/backfill get_indexer that works for monotonic decreasing
+        indexes and non-monotonic targets.
         """
-        Return the label from the index, or, if not present, the previous one.
-
-        Assuming that the index is sorted, return the passed index label if it
-        is in the index, or return the previous index label if the passed one
-        is not in the index.
+        if limit is not None:
+            raise ValueError('limit argument for %r method only well-defined '
+                             'if index and target are monotonic' % method)
 
-        Parameters
-        ----------
-        label : object
-            The label up to which the method returns the latest index label.
+        side = 'left' if method == 'pad' else 'right'
 
-        Returns
-        -------
-        object
-            The passed label if it is in the index. The previous label if the
-            passed label is not in the sorted index or `NaN` if there is no
-            such label.
-
-        See Also
-        --------
-        Series.asof : Return the latest value in a Series up to the
-            passed index.
-        merge_asof : Perform an asof merge (similar to left join but it
-            matches on nearest key rather than equal key).
-        Index.get_loc : An `asof` is a thin wrapper around `get_loc`
-            with method='pad'.
-
-        Examples
-        --------
-        `Index.asof` returns the latest index label up to the passed label.
+        # find exact matches first (this simplifies the algorithm)
+        indexer = self.get_indexer(target)
+        nonexact = (indexer == -1)
+        indexer[nonexact] = self._searchsorted_monotonic(target[nonexact],
+                                                         side)
+        if side == 'left':
+            # searchsorted returns "indices into a sorted array such that,
+            # if the corresponding elements in v were inserted before the
+            # indices, the order of a would be preserved".
+            # Thus, we need to subtract 1 to find values to the left.
+            indexer[nonexact] -= 1
+            # This also mapped not found values (values of 0 from
+            # np.searchsorted) to -1, which conveniently is also our
+            # sentinel for missing values
+        else:
+            # Mark indices to the right of the largest value as not found
+            indexer[indexer == len(self)] = -1
+        return indexer
 
-        >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])
-        >>> idx.asof('2014-01-01')
-        '2013-12-31'
+    def _get_nearest_indexer(self, target, limit, tolerance):
+        """
+        Get the indexer for the nearest index labels; requires an index with
+        values that can be subtracted from each other (e.g., not strings or
+        tuples).
+        """
+        left_indexer = self.get_indexer(target, 'pad', limit=limit)
+        right_indexer = self.get_indexer(target, 'backfill', limit=limit)
 
-        If the label is in the index, the method returns the passed label.
+        target = np.asarray(target)
+        left_distances = abs(self.values[left_indexer] - target)
+        right_distances = abs(self.values[right_indexer] - target)
 
-        >>> idx.asof('2014-01-02')
-        '2014-01-02'
+        op = operator.lt if self.is_monotonic_increasing else operator.le
+        indexer = np.where(op(left_distances, right_distances) |
+                           (right_indexer == -1), left_indexer, right_indexer)
+        if tolerance is not None:
+            indexer = self._filter_indexer_tolerance(target, indexer,
+                                                     tolerance)
+        return indexer
 
-        If all of the labels in the index are later than the passed label,
-        NaN is returned.
+    def _filter_indexer_tolerance(self, target, indexer, tolerance):
+        distance = abs(self.values[indexer] - target)
+        indexer = np.where(distance <= tolerance, indexer, -1)
+        return indexer
 
-        >>> idx.asof('1999-01-02')
-        nan
+    # --------------------------------------------------------------------
+    # Indexer Conversion Methods
 
-        If the index is not sorted, an error is raised.
+    _index_shared_docs['_convert_scalar_indexer'] = """
+        Convert a scalar indexer.
 
-        >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',
-        ...                            '2014-01-03'])
-        >>> idx_not_sorted.asof('2013-12-31')
-        Traceback (most recent call last):
-        ValueError: index must be monotonic increasing or decreasing
-        """
-        try:
-            loc = self.get_loc(label, method='pad')
-        except KeyError:
-            return self._na_value
-        else:
-            if isinstance(loc, slice):
-                loc = loc.indices(len(self))[-1]
-            return self[loc]
+        Parameters
+        ----------
+        key : label of the slice bound
+        kind : {'ix', 'loc', 'getitem', 'iloc'} or None
+    """
 
-    def asof_locs(self, where, mask):
-        """
-        Finds the locations (indices) of the labels from the index for
-        every entry in the `where` argument.
+    @Appender(_index_shared_docs['_convert_scalar_indexer'])
+    def _convert_scalar_indexer(self, key, kind=None):
+        assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
 
-        As in the `asof` function, if the label (a particular entry in
-        `where`) is not in the index, the latest index label upto the
-        passed label is chosen and its index returned.
+        if kind == 'iloc':
+            return self._validate_indexer('positional', key, kind)
 
-        If all of the labels in the index are later than a label in `where`,
-        -1 is returned.
+        if len(self) and not isinstance(self, ABCMultiIndex,):
 
-        `mask` is used to ignore NA values in the index during calculation.
+            # we can raise here if we are definitive that this
+            # is positional indexing (eg. .ix on with a float)
+            # or label indexing if we are using a type able
+            # to be represented in the index
 
-        Parameters
-        ----------
-        where : Index
-            An Index consisting of an array of timestamps.
-        mask : array-like
-            Array of booleans denoting where values in the original
-            data are not NA.
+            if kind in ['getitem', 'ix'] and is_float(key):
+                if not self.is_floating():
+                    return self._invalid_indexer('label', key)
 
-        Returns
-        -------
-        numpy.ndarray
-            An array of locations (indices) of the labels from the Index
-            which correspond to the return values of the `asof` function
-            for every element in `where`.
-        """
-        locs = self.values[mask].searchsorted(where.values, side='right')
-        locs = np.where(locs > 0, locs - 1, 0)
+            elif kind in ['loc'] and is_float(key):
 
-        result = np.arange(len(self))[mask].take(locs)
+                # we want to raise KeyError on string/mixed here
+                # technically we *could* raise a TypeError
+                # on anything but mixed though
+                if self.inferred_type not in ['floating',
+                                              'mixed-integer-float',
+                                              'string',
+                                              'unicode',
+                                              'mixed']:
+                    return self._invalid_indexer('label', key)
 
-        first = mask.argmax()
-        result[(locs == 0) & (where.values < self.values[first])] = -1
+            elif kind in ['loc'] and is_integer(key):
+                if not self.holds_integer():
+                    return self._invalid_indexer('label', key)
 
-        return result
+        return key
 
-    def sort_values(self, return_indexer=False, ascending=True):
-        """
-        Return a sorted copy of the index.
+    _index_shared_docs['_convert_slice_indexer'] = """
+        Convert a slice indexer.
 
-        Return a sorted copy of the index, and optionally return the indices
-        that sorted the index itself.
+        By definition, these are labels unless 'iloc' is passed in.
+        Floats are not allowed as the start, step, or stop of the slice.
 
         Parameters
         ----------
-        return_indexer : bool, default False
-            Should the indices that would sort the index be returned.
-        ascending : bool, default True
-            Should the index values be sorted in an ascending order.
+        key : label of the slice bound
+        kind : {'ix', 'loc', 'getitem', 'iloc'} or None
+    """
 
-        Returns
-        -------
-        sorted_index : pandas.Index
-            Sorted copy of the index.
-        indexer : numpy.ndarray, optional
-            The indices that the index itself was sorted by.
+    @Appender(_index_shared_docs['_convert_slice_indexer'])
+    def _convert_slice_indexer(self, key, kind=None):
+        assert kind in ['ix', 'loc', 'getitem', 'iloc', None]
 
-        See Also
-        --------
-        pandas.Series.sort_values : Sort values of a Series.
-        pandas.DataFrame.sort_values : Sort values in a DataFrame.
+        # if we are not a slice, then we are done
+        if not isinstance(key, slice):
+            return key
 
-        Examples
-        --------
-        >>> idx = pd.Index([10, 100, 1, 1000])
-        >>> idx
-        Int64Index([10, 100, 1, 1000], dtype='int64')
+        # validate iloc
+        if kind == 'iloc':
+            return slice(self._validate_indexer('slice', key.start, kind),
+                         self._validate_indexer('slice', key.stop, kind),
+                         self._validate_indexer('slice', key.step, kind))
 
-        Sort values in ascending order (default behavior).
+        # potentially cast the bounds to integers
+        start, stop, step = key.start, key.stop, key.step
 
-        >>> idx.sort_values()
-        Int64Index([1, 10, 100, 1000], dtype='int64')
+        # figure out if this is a positional indexer
+        def is_int(v):
+            return v is None or is_integer(v)
 
-        Sort values in descending order, and also get the indices `idx` was
-        sorted by.
+        is_null_slicer = start is None and stop is None
+        is_index_slice = is_int(start) and is_int(stop)
+        is_positional = is_index_slice and not self.is_integer()
 
-        >>> idx.sort_values(ascending=False, return_indexer=True)
-        (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
-        """
-        _as = self.argsort()
-        if not ascending:
-            _as = _as[::-1]
+        if kind == 'getitem':
+            """
+            called from the getitem slicers, validate that we are in fact
+            integers
+            """
+            if self.is_integer() or is_index_slice:
+                return slice(self._validate_indexer('slice', key.start, kind),
+                             self._validate_indexer('slice', key.stop, kind),
+                             self._validate_indexer('slice', key.step, kind))
 
-        sorted_index = self.take(_as)
+        # convert the slice to an indexer here
 
-        if return_indexer:
-            return sorted_index, _as
+        # if we are mixed and have integers
+        try:
+            if is_positional and self.is_mixed():
+                # Validate start & stop
+                if start is not None:
+                    self.get_loc(start)
+                if stop is not None:
+                    self.get_loc(stop)
+                is_positional = False
+        except KeyError:
+            if self.inferred_type == 'mixed-integer-float':
+                raise
+
+        if is_null_slicer:
+            indexer = key
+        elif is_positional:
+            indexer = key
         else:
-            return sorted_index
+            try:
+                indexer = self.slice_indexer(start, stop, step, kind=kind)
+            except Exception:
+                if is_index_slice:
+                    if self.is_integer():
+                        raise
+                    else:
+                        indexer = key
+                else:
+                    raise
 
-    def sort(self, *args, **kwargs):
-        raise TypeError("cannot sort an Index object in-place, use "
-                        "sort_values instead")
+        return indexer
 
-    def sortlevel(self, level=None, ascending=True, sort_remaining=None):
+    def _convert_listlike_indexer(self, keyarr, kind=None):
         """
-        For internal compatibility with with the Index API.
-
-        Sort the Index. This is for compat with MultiIndex
-
         Parameters
         ----------
-        ascending : boolean, default True
-            False to sort in descending order
-
-        level, sort_remaining are compat parameters
+        keyarr : list-like
+            Indexer to convert.
 
         Returns
         -------
-        sorted_index : Index
+        tuple (indexer, keyarr)
+            indexer is an ndarray or None if cannot convert
+            keyarr are tuple-safe keys
         """
-        return self.sort_values(return_indexer=True, ascending=ascending)
+        if isinstance(keyarr, Index):
+            keyarr = self._convert_index_indexer(keyarr)
+        else:
+            keyarr = self._convert_arr_indexer(keyarr)
 
-    def shift(self, periods=1, freq=None):
-        """
-        Shift index by desired number of time frequency increments.
+        indexer = self._convert_list_indexer(keyarr, kind=kind)
+        return indexer, keyarr
 
-        This method is for shifting the values of datetime-like indexes
-        by a specified time increment a given number of times.
+    _index_shared_docs['_convert_arr_indexer'] = """
+        Convert an array-like indexer to the appropriate dtype.
 
         Parameters
         ----------
-        periods : int, default 1
-            Number of periods (or increments) to shift by,
-            can be positive or negative.
-        freq : pandas.DateOffset, pandas.Timedelta or string, optional
-            Frequency increment to shift by.
-            If None, the index is shifted by its own `freq` attribute.
-            Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
+        keyarr : array-like
+            Indexer to convert.
 
         Returns
         -------
-        pandas.Index
-            shifted index
-
-        See Also
-        --------
-        Series.shift : Shift values of Series.
-
-        Examples
-        --------
-        Put the first 5 month starts of 2011 into an index.
-
-        >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')
-        >>> month_starts
-        DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',
-                       '2011-05-01'],
-                      dtype='datetime64[ns]', freq='MS')
-
-        Shift the index by 10 days.
-
-        >>> month_starts.shift(10, freq='D')
-        DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',
-                       '2011-05-11'],
-                      dtype='datetime64[ns]', freq=None)
-
-        The default value of `freq` is the `freq` attribute of the index,
-        which is 'MS' (month start) in this example.
-
-        >>> month_starts.shift(10)
-        DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',
-                       '2012-03-01'],
-                      dtype='datetime64[ns]', freq='MS')
+        converted_keyarr : array-like
+    """
 
-        Notes
-        -----
-        This method is only implemented for datetime-like index classes,
-        i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.
-        """
-        raise NotImplementedError("Not supported for type %s" %
-                                  type(self).__name__)
+    @Appender(_index_shared_docs['_convert_arr_indexer'])
+    def _convert_arr_indexer(self, keyarr):
+        keyarr = com.asarray_tuplesafe(keyarr)
+        return keyarr
 
-    def argsort(self, *args, **kwargs):
-        """
-        Return the integer indices that would sort the index.
+    _index_shared_docs['_convert_index_indexer'] = """
+        Convert an Index indexer to the appropriate dtype.
 
         Parameters
         ----------
-        *args
-            Passed to `numpy.ndarray.argsort`.
-        **kwargs
-            Passed to `numpy.ndarray.argsort`.
+        keyarr : Index (or sub-class)
+            Indexer to convert.
 
         Returns
         -------
-        numpy.ndarray
-            Integer indices that would sort the index if used as
-            an indexer.
+        converted_keyarr : Index (or sub-class)
+    """
 
-        See Also
-        --------
-        numpy.argsort : Similar method for NumPy arrays.
-        Index.sort_values : Return sorted copy of Index.
+    @Appender(_index_shared_docs['_convert_index_indexer'])
+    def _convert_index_indexer(self, keyarr):
+        return keyarr
 
-        Examples
-        --------
-        >>> idx = pd.Index(['b', 'a', 'd', 'c'])
-        >>> idx
-        Index(['b', 'a', 'd', 'c'], dtype='object')
+    _index_shared_docs['_convert_list_indexer'] = """
+        Convert a list-like indexer to the appropriate dtype.
 
-        >>> order = idx.argsort()
-        >>> order
-        array([1, 0, 3, 2])
+        Parameters
+        ----------
+        keyarr : Index (or sub-class)
+            Indexer to convert.
+        kind : iloc, ix, loc, optional
 
-        >>> idx[order]
-        Index(['a', 'b', 'c', 'd'], dtype='object')
-        """
-        result = self.asi8
-        if result is None:
-            result = np.array(self)
-        return result.argsort(*args, **kwargs)
+        Returns
+        -------
+        positional indexer or None
+    """
 
-    def __add__(self, other):
-        if isinstance(other, (ABCSeries, ABCDataFrame)):
-            return NotImplemented
-        return Index(np.array(self) + other)
+    @Appender(_index_shared_docs['_convert_list_indexer'])
+    def _convert_list_indexer(self, keyarr, kind=None):
+        if (kind in [None, 'iloc', 'ix'] and
+                is_integer_dtype(keyarr) and not self.is_floating() and
+                not isinstance(keyarr, ABCPeriodIndex)):
 
-    def __radd__(self, other):
-        return Index(other + np.array(self))
+            if self.inferred_type == 'mixed-integer':
+                indexer = self.get_indexer(keyarr)
+                if (indexer >= 0).all():
+                    return indexer
+                # missing values are flagged as -1 by get_indexer and negative
+                # indices are already converted to positive indices in the
+                # above if-statement, so the negative flags are changed to
+                # values outside the range of indices so as to trigger an
+                # IndexError in maybe_convert_indices
+                indexer[indexer < 0] = len(self)
+                from pandas.core.indexing import maybe_convert_indices
+                return maybe_convert_indices(indexer, len(self))
 
-    def __iadd__(self, other):
-        # alias for __add__
-        return self + other
+            elif not self.inferred_type == 'integer':
+                keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr)
+                return keyarr
 
-    def __sub__(self, other):
-        return Index(np.array(self) - other)
+        return None
 
-    def __rsub__(self, other):
-        return Index(other - np.array(self))
+    def _invalid_indexer(self, form, key):
+        """
+        Consistent invalid indexer message.
+        """
+        raise TypeError("cannot do {form} indexing on {klass} with these "
+                        "indexers [{key}] of {kind}".format(
+                            form=form, klass=type(self), key=key,
+                            kind=type(key)))
 
-    def __and__(self, other):
-        return self.intersection(other)
+    # --------------------------------------------------------------------
+    # Reindex Methods
 
-    def __or__(self, other):
-        return self.union(other)
+    def _can_reindex(self, indexer):
+        """
+        Check if we are allowing reindexing with this particular indexer.
 
-    def __xor__(self, other):
-        return self.symmetric_difference(other)
+        Parameters
+        ----------
+        indexer : an integer indexer
 
-    def _get_reconciled_name_object(self, other):
-        """
-        If the result of a set operation will be self,
-        return self, unless the name changes, in which
-        case make a shallow copy of self.
+        Raises
+        ------
+        ValueError if its a duplicate axis
         """
-        name = get_op_result_name(self, other)
-        if self.name != name:
-            return self._shallow_copy(name=name)
-        return self
 
-    def union(self, other):
+        # trying to reindex on an axis with duplicates
+        if not self.is_unique and len(indexer):
+            raise ValueError("cannot reindex from a duplicate axis")
+
+    def reindex(self, target, method=None, level=None, limit=None,
+                tolerance=None):
         """
-        Form the union of two Index objects and sorts if possible.
+        Create index with target's values (move/add/delete values
+        as necessary).
 
         Parameters
         ----------
-        other : Index or array-like
+        target : an iterable
 
         Returns
         -------
-        union : Index
-
-        Examples
-        --------
+        new_index : pd.Index
+            Resulting index
+        indexer : np.ndarray or None
+            Indices of output values in original index
 
-        >>> idx1 = pd.Index([1, 2, 3, 4])
-        >>> idx2 = pd.Index([3, 4, 5, 6])
-        >>> idx1.union(idx2)
-        Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
         """
-        self._assert_can_do_setop(other)
-        other = ensure_index(other)
-
-        if len(other) == 0 or self.equals(other):
-            return self._get_reconciled_name_object(other)
-
-        if len(self) == 0:
-            return other._get_reconciled_name_object(self)
+        # GH6552: preserve names when reindexing to non-named target
+        # (i.e. neither Index nor Series).
+        preserve_names = not hasattr(target, 'name')
 
-        # TODO: is_dtype_union_equal is a hack around
-        # 1. buggy set ops with duplicates (GH #13432)
-        # 2. CategoricalIndex lacking setops (GH #10186)
-        # Once those are fixed, this workaround can be removed
-        if not is_dtype_union_equal(self.dtype, other.dtype):
-            this = self.astype('O')
-            other = other.astype('O')
-            return this.union(other)
+        # GH7774: preserve dtype/tz if target is empty and not an Index.
+        target = _ensure_has_len(target)  # target may be an iterator
 
-        # TODO(EA): setops-refactor, clean all this up
-        if is_period_dtype(self) or is_datetime64tz_dtype(self):
-            lvals = self._ndarray_values
-        else:
-            lvals = self._values
-        if is_period_dtype(other) or is_datetime64tz_dtype(other):
-            rvals = other._ndarray_values
+        if not isinstance(target, Index) and len(target) == 0:
+            attrs = self._get_attributes_dict()
+            attrs.pop('freq', None)  # don't preserve freq
+            values = self._data[:0]  # appropriately-dtyped empty array
+            target = self._simple_new(values, dtype=self.dtype, **attrs)
         else:
-            rvals = other._values
-
-        if self.is_monotonic and other.is_monotonic:
-            try:
-                result = self._outer_indexer(lvals, rvals)[0]
-            except TypeError:
-                # incomparable objects
-                result = list(lvals)
+            target = ensure_index(target)
 
-                # worth making this faster? a very unusual case
-                value_set = set(lvals)
-                result.extend([x for x in rvals if x not in value_set])
+        if level is not None:
+            if method is not None:
+                raise TypeError('Fill method not supported if level passed')
+            _, indexer, _ = self._join_level(target, level, how='right',
+                                             return_indexers=True)
         else:
-            indexer = self.get_indexer(other)
-            indexer, = (indexer == -1).nonzero()
-
-            if len(indexer) > 0:
-                other_diff = algos.take_nd(rvals, indexer,
-                                           allow_fill=False)
-                result = _concat._concat_compat((lvals, other_diff))
-
-                try:
-                    lvals[0] < other_diff[0]
-                except TypeError as e:
-                    warnings.warn("%s, sort order is undefined for "
-                                  "incomparable objects" % e, RuntimeWarning,
-                                  stacklevel=3)
-                else:
-                    types = frozenset((self.inferred_type,
-                                       other.inferred_type))
-                    if not types & _unsortable_types:
-                        result.sort()
-
+            if self.equals(target):
+                indexer = None
             else:
-                result = lvals
 
-                try:
-                    result = np.sort(result)
-                except TypeError as e:
-                    warnings.warn("%s, sort order is undefined for "
-                                  "incomparable objects" % e, RuntimeWarning,
-                                  stacklevel=3)
+                if self.is_unique:
+                    indexer = self.get_indexer(target, method=method,
+                                               limit=limit,
+                                               tolerance=tolerance)
+                else:
+                    if method is not None or limit is not None:
+                        raise ValueError("cannot reindex a non-unique index "
+                                         "with a method or limit")
+                    indexer, missing = self.get_indexer_non_unique(target)
 
-        # for subclasses
-        return self._wrap_setop_result(other, result)
+        if preserve_names and target.nlevels == 1 and target.name != self.name:
+            target = target.copy()
+            target.name = self.name
 
-    def _wrap_setop_result(self, other, result):
-        return self._constructor(result, name=get_op_result_name(self, other))
+        return target, indexer
 
-    def intersection(self, other):
+    def _reindex_non_unique(self, target):
         """
-        Form the intersection of two Index objects.
-
-        This returns a new Index with elements common to the index and `other`,
-        preserving the order of the calling index.
+        Create a new index with target's values (move/add/delete values as
+        necessary) use with non-unique Index and a possibly non-unique target.
 
         Parameters
         ----------
-        other : Index or array-like
+        target : an iterable
 
         Returns
         -------
-        intersection : Index
-
-        Examples
-        --------
+        new_index : pd.Index
+            Resulting index
+        indexer : np.ndarray or None
+            Indices of output values in original index
 
-        >>> idx1 = pd.Index([1, 2, 3, 4])
-        >>> idx2 = pd.Index([3, 4, 5, 6])
-        >>> idx1.intersection(idx2)
-        Int64Index([3, 4], dtype='int64')
         """
-        self._assert_can_do_setop(other)
-        other = ensure_index(other)
 
-        if self.equals(other):
-            return self._get_reconciled_name_object(other)
+        target = ensure_index(target)
+        indexer, missing = self.get_indexer_non_unique(target)
+        check = indexer != -1
+        new_labels = self.take(indexer[check])
+        new_indexer = None
 
-        if not is_dtype_equal(self.dtype, other.dtype):
-            this = self.astype('O')
-            other = other.astype('O')
-            return this.intersection(other)
+        if len(missing):
+            length = np.arange(len(indexer))
 
-        # TODO(EA): setops-refactor, clean all this up
-        if is_period_dtype(self):
-            lvals = self._ndarray_values
-        else:
-            lvals = self._values
-        if is_period_dtype(other):
-            rvals = other._ndarray_values
-        else:
-            rvals = other._values
+            missing = ensure_platform_int(missing)
+            missing_labels = target.take(missing)
+            missing_indexer = ensure_int64(length[~check])
+            cur_labels = self.take(indexer[check]).values
+            cur_indexer = ensure_int64(length[check])
 
-        if self.is_monotonic and other.is_monotonic:
-            try:
-                result = self._inner_indexer(lvals, rvals)[0]
-                return self._wrap_setop_result(other, result)
-            except TypeError:
-                pass
+            new_labels = np.empty(tuple([len(indexer)]), dtype=object)
+            new_labels[cur_indexer] = cur_labels
+            new_labels[missing_indexer] = missing_labels
 
-        try:
-            indexer = Index(rvals).get_indexer(lvals)
-            indexer = indexer.take((indexer != -1).nonzero()[0])
-        except Exception:
-            # duplicates
-            indexer = algos.unique1d(
-                Index(rvals).get_indexer_non_unique(lvals)[0])
-            indexer = indexer[indexer != -1]
+            # a unique indexer
+            if target.is_unique:
 
-        taken = other.take(indexer)
-        if self.name != other.name:
-            taken.name = None
-        return taken
+                # see GH5553, make sure we use the right indexer
+                new_indexer = np.arange(len(indexer))
+                new_indexer[cur_indexer] = np.arange(len(cur_labels))
+                new_indexer[missing_indexer] = -1
 
-    def difference(self, other, sort=True):
-        """
-        Return a new Index with elements from the index that are not in
-        `other`.
+            # we have a non_unique selector, need to use the original
+            # indexer here
+            else:
 
-        This is the set difference of two Index objects.
+                # need to retake to have the same size as the indexer
+                indexer[~check] = -1
+
+                # reset the new indexer to account for the new size
+                new_indexer = np.arange(len(self.take(indexer)))
+                new_indexer[~check] = -1
+
+        new_index = self._shallow_copy_with_infer(new_labels, freq=None)
+        return new_index, indexer, new_indexer
+
+    # --------------------------------------------------------------------
+    # Join Methods
+
+    _index_shared_docs['join'] = """
+        Compute join_index and indexers to conform data
+        structures to the new index.
 
         Parameters
         ----------
-        other : Index or array-like
-        sort : bool, default True
-            Sort the resulting index if possible
+        other : Index
+        how : {'left', 'right', 'inner', 'outer'}
+        level : int or level name, default None
+        return_indexers : boolean, default False
+        sort : boolean, default False
+            Sort the join keys lexicographically in the result Index. If False,
+            the order of the join keys depends on the join type (how keyword)
 
-            .. versionadded:: 0.24.0
+            .. versionadded:: 0.20.0
 
         Returns
         -------
-        difference : Index
-
-        Examples
-        --------
-
-        >>> idx1 = pd.Index([2, 1, 3, 4])
-        >>> idx2 = pd.Index([3, 4, 5, 6])
-        >>> idx1.difference(idx2)
-        Int64Index([1, 2], dtype='int64')
-        >>> idx1.difference(idx2, sort=False)
-        Int64Index([2, 1], dtype='int64')
+        join_index, (left_indexer, right_indexer)
         """
-        self._assert_can_do_setop(other)
 
-        if self.equals(other):
-            # pass an empty np.ndarray with the appropriate dtype
-            return self._shallow_copy(self._data[:0])
+    @Appender(_index_shared_docs['join'])
+    def join(self, other, how='left', level=None, return_indexers=False,
+             sort=False):
+        from .multi import MultiIndex
+        self_is_mi = isinstance(self, MultiIndex)
+        other_is_mi = isinstance(other, MultiIndex)
 
-        other, result_name = self._convert_can_do_setop(other)
+        # try to figure out the join level
+        # GH3662
+        if level is None and (self_is_mi or other_is_mi):
 
-        this = self._get_unique_index()
+            # have the same levels/names so a simple join
+            if self.names == other.names:
+                pass
+            else:
+                return self._join_multi(other, how=how,
+                                        return_indexers=return_indexers)
 
-        indexer = this.get_indexer(other)
-        indexer = indexer.take((indexer != -1).nonzero()[0])
+        # join on the level
+        if level is not None and (self_is_mi or other_is_mi):
+            return self._join_level(other, level, how=how,
+                                    return_indexers=return_indexers)
 
-        label_diff = np.setdiff1d(np.arange(this.size), indexer,
-                                  assume_unique=True)
-        the_diff = this.values.take(label_diff)
-        if sort:
-            try:
-                the_diff = sorting.safe_sort(the_diff)
-            except TypeError:
-                pass
+        other = ensure_index(other)
 
-        return this._shallow_copy(the_diff, name=result_name, freq=None)
+        if len(other) == 0 and how in ('left', 'outer'):
+            join_index = self._shallow_copy()
+            if return_indexers:
+                rindexer = np.repeat(-1, len(join_index))
+                return join_index, None, rindexer
+            else:
+                return join_index
 
-    def symmetric_difference(self, other, result_name=None):
-        """
-        Compute the symmetric difference of two Index objects.
+        if len(self) == 0 and how in ('right', 'outer'):
+            join_index = other._shallow_copy()
+            if return_indexers:
+                lindexer = np.repeat(-1, len(join_index))
+                return join_index, lindexer, None
+            else:
+                return join_index
 
-        It's sorted if sorting is possible.
+        if self._join_precedence < other._join_precedence:
+            how = {'right': 'left', 'left': 'right'}.get(how, how)
+            result = other.join(self, how=how, level=level,
+                                return_indexers=return_indexers)
+            if return_indexers:
+                x, y, z = result
+                result = x, z, y
+            return result
 
-        Parameters
-        ----------
-        other : Index or array-like
-        result_name : str
+        if not is_dtype_equal(self.dtype, other.dtype):
+            this = self.astype('O')
+            other = other.astype('O')
+            return this.join(other, how=how, return_indexers=return_indexers)
 
-        Returns
-        -------
-        symmetric_difference : Index
+        _validate_join_method(how)
 
-        Notes
-        -----
-        ``symmetric_difference`` contains elements that appear in either
-        ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by
-        ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates
-        dropped.
+        if not self.is_unique and not other.is_unique:
+            return self._join_non_unique(other, how=how,
+                                         return_indexers=return_indexers)
+        elif not self.is_unique or not other.is_unique:
+            if self.is_monotonic and other.is_monotonic:
+                return self._join_monotonic(other, how=how,
+                                            return_indexers=return_indexers)
+            else:
+                return self._join_non_unique(other, how=how,
+                                             return_indexers=return_indexers)
+        elif self.is_monotonic and other.is_monotonic:
+            try:
+                return self._join_monotonic(other, how=how,
+                                            return_indexers=return_indexers)
+            except TypeError:
+                pass
 
-        Examples
-        --------
-        >>> idx1 = pd.Index([1, 2, 3, 4])
-        >>> idx2 = pd.Index([2, 3, 4, 5])
-        >>> idx1.symmetric_difference(idx2)
-        Int64Index([1, 5], dtype='int64')
+        if how == 'left':
+            join_index = self
+        elif how == 'right':
+            join_index = other
+        elif how == 'inner':
+            join_index = self.intersection(other)
+        elif how == 'outer':
+            join_index = self.union(other)
 
-        You can also use the ``^`` operator:
+        if sort:
+            join_index = join_index.sort_values()
 
-        >>> idx1 ^ idx2
-        Int64Index([1, 5], dtype='int64')
-        """
-        self._assert_can_do_setop(other)
-        other, result_name_update = self._convert_can_do_setop(other)
-        if result_name is None:
-            result_name = result_name_update
+        if return_indexers:
+            if join_index is self:
+                lindexer = None
+            else:
+                lindexer = self.get_indexer(join_index)
+            if join_index is other:
+                rindexer = None
+            else:
+                rindexer = other.get_indexer(join_index)
+            return join_index, lindexer, rindexer
+        else:
+            return join_index
 
-        this = self._get_unique_index()
-        other = other._get_unique_index()
-        indexer = this.get_indexer(other)
+    def _join_multi(self, other, how, return_indexers=True):
+        from .multi import MultiIndex
+        from pandas.core.reshape.merge import _restore_dropped_levels_multijoin
 
-        # {this} minus {other}
-        common_indexer = indexer.take((indexer != -1).nonzero()[0])
-        left_indexer = np.setdiff1d(np.arange(this.size), common_indexer,
-                                    assume_unique=True)
-        left_diff = this.values.take(left_indexer)
+        # figure out join names
+        self_names = set(com._not_none(*self.names))
+        other_names = set(com._not_none(*other.names))
+        overlap = self_names & other_names
 
-        # {other} minus {this}
-        right_indexer = (indexer == -1).nonzero()[0]
-        right_diff = other.values.take(right_indexer)
+        # need at least 1 in common
+        if not overlap:
+            raise ValueError("cannot join with no overlapping index names")
 
-        the_diff = _concat._concat_compat([left_diff, right_diff])
-        try:
-            the_diff = sorting.safe_sort(the_diff)
-        except TypeError:
-            pass
+        self_is_mi = isinstance(self, MultiIndex)
+        other_is_mi = isinstance(other, MultiIndex)
 
-        attribs = self._get_attributes_dict()
-        attribs['name'] = result_name
-        if 'freq' in attribs:
-            attribs['freq'] = None
-        return self._shallow_copy_with_infer(the_diff, **attribs)
+        if self_is_mi and other_is_mi:
 
-    def _get_unique_index(self, dropna=False):
-        """
-        Returns an index containing unique values.
+            # Drop the non-matching levels from left and right respectively
+            ldrop_names = list(self_names - overlap)
+            rdrop_names = list(other_names - overlap)
 
-        Parameters
-        ----------
-        dropna : bool
-            If True, NaN values are dropped.
+            self_jnlevels = self.droplevel(ldrop_names)
+            other_jnlevels = other.droplevel(rdrop_names)
 
-        Returns
-        -------
-        uniques : index
-        """
-        if self.is_unique and not dropna:
-            return self
+            # Join left and right
+            # Join on same leveled multi-index frames is supported
+            join_idx, lidx, ridx = self_jnlevels.join(other_jnlevels, how,
+                                                      return_indexers=True)
 
-        values = self.values
+            # Restore the dropped levels
+            # Returned index level order is
+            # common levels, ldrop_names, rdrop_names
+            dropped_names = ldrop_names + rdrop_names
 
-        if not self.is_unique:
-            values = self.unique()
+            levels, labels, names = (
+                _restore_dropped_levels_multijoin(self, other,
+                                                  dropped_names,
+                                                  join_idx,
+                                                  lidx, ridx))
 
-        if dropna:
-            try:
-                if self.hasnans:
-                    values = values[~isna(values)]
-            except NotImplementedError:
-                pass
+            # Re-create the multi-index
+            multi_join_idx = MultiIndex(levels=levels, labels=labels,
+                                        names=names, verify_integrity=False)
 
-        return self._shallow_copy(values)
+            multi_join_idx = multi_join_idx.remove_unused_levels()
 
-    _index_shared_docs['get_loc'] = """
-        Get integer location, slice or boolean mask for requested label.
+            return multi_join_idx, lidx, ridx
 
-        Parameters
-        ----------
-        key : label
-        method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
-            * default: exact matches only.
-            * pad / ffill: find the PREVIOUS index value if no exact match.
-            * backfill / bfill: use NEXT index value if no exact match
-            * nearest: use the NEAREST index value if no exact match. Tied
-              distances are broken by preferring the larger index value.
-        tolerance : optional
-            Maximum distance from index value for inexact matches. The value of
-            the index at the matching location most satisfy the equation
-            ``abs(index[loc] - key) <= tolerance``.
+        jl = list(overlap)[0]
 
-            Tolerance may be a scalar
-            value, which applies the same tolerance to all values, or
-            list-like, which applies variable tolerance per element. List-like
-            includes list, tuple, array, Series, and must be the same size as
-            the index and its dtype must exactly match the index's type.
+        # Case where only one index is multi
+        # make the indices into mi's that match
+        flip_order = False
+        if self_is_mi:
+            self, other = other, self
+            flip_order = True
+            # flip if join method is right or left
+            how = {'right': 'left', 'left': 'right'}.get(how, how)
 
-            .. versionadded:: 0.21.0 (list-like tolerance)
+        level = other.names.index(jl)
+        result = self._join_level(other, level, how=how,
+                                  return_indexers=return_indexers)
 
-        Returns
-        -------
-        loc : int if unique index, slice if monotonic index, else mask
+        if flip_order:
+            if isinstance(result, tuple):
+                return result[0], result[2], result[1]
+        return result
 
-        Examples
-        ---------
-        >>> unique_index = pd.Index(list('abc'))
-        >>> unique_index.get_loc('b')
-        1
+    def _join_non_unique(self, other, how='left', return_indexers=False):
+        from pandas.core.reshape.merge import _get_join_indexers
 
-        >>> monotonic_index = pd.Index(list('abbc'))
-        >>> monotonic_index.get_loc('b')
-        slice(1, 3, None)
+        left_idx, right_idx = _get_join_indexers([self._ndarray_values],
+                                                 [other._ndarray_values],
+                                                 how=how,
+                                                 sort=True)
 
-        >>> non_monotonic_index = pd.Index(list('abcb'))
-        >>> non_monotonic_index.get_loc('b')
-        array([False,  True, False,  True], dtype=bool)
-        """
+        left_idx = ensure_platform_int(left_idx)
+        right_idx = ensure_platform_int(right_idx)
 
-    @Appender(_index_shared_docs['get_loc'])
-    def get_loc(self, key, method=None, tolerance=None):
-        if method is None:
-            if tolerance is not None:
-                raise ValueError('tolerance argument only valid if using pad, '
-                                 'backfill or nearest lookups')
-            try:
-                return self._engine.get_loc(key)
-            except KeyError:
-                return self._engine.get_loc(self._maybe_cast_indexer(key))
-        indexer = self.get_indexer([key], method=method, tolerance=tolerance)
-        if indexer.ndim > 1 or indexer.size > 1:
-            raise TypeError('get_loc requires scalar valued input')
-        loc = indexer.item()
-        if loc == -1:
-            raise KeyError(key)
-        return loc
+        join_index = np.asarray(self._ndarray_values.take(left_idx))
+        mask = left_idx == -1
+        np.putmask(join_index, mask, other._ndarray_values.take(right_idx))
 
-    def get_value(self, series, key):
-        """
-        Fast lookup of value from 1-dimensional ndarray. Only use this if you
-        know what you're doing.
-        """
+        join_index = self._wrap_joined_index(join_index, other)
 
-        # if we have something that is Index-like, then
-        # use this, e.g. DatetimeIndex
-        s = getattr(series, '_values', None)
-        if isinstance(s, (ExtensionArray, Index)) and is_scalar(key):
-            # GH 20882, 21257
-            # Unify Index and ExtensionArray treatment
-            # First try to convert the key to a location
-            # If that fails, raise a KeyError if an integer
-            # index, otherwise, see if key is an integer, and
-            # try that
-            try:
-                iloc = self.get_loc(key)
-                return s[iloc]
-            except KeyError:
-                if (len(self) > 0 and
-                        (self.holds_integer() or self.is_boolean())):
-                    raise
-                elif is_integer(key):
-                    return s[key]
+        if return_indexers:
+            return join_index, left_idx, right_idx
+        else:
+            return join_index
 
-        s = com.values_from_object(series)
-        k = com.values_from_object(key)
+    def _join_level(self, other, level, how='left', return_indexers=False,
+                    keep_order=True):
+        """
+        The join method *only* affects the level of the resulting
+        MultiIndex. Otherwise it just exactly aligns the Index data to the
+        labels of the level in the MultiIndex.
 
-        k = self._convert_scalar_indexer(k, kind='getitem')
-        try:
-            return self._engine.get_value(s, k,
-                                          tz=getattr(series.dtype, 'tz', None))
-        except KeyError as e1:
-            if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
-                raise
-
-            try:
-                return libindex.get_value_box(s, key)
-            except IndexError:
-                raise
-            except TypeError:
-                # generator/iterator-like
-                if is_iterator(key):
-                    raise InvalidIndexError(key)
-                else:
-                    raise e1
-            except Exception:  # pragma: no cover
-                raise e1
-        except TypeError:
-            # python 3
-            if is_scalar(key):  # pragma: no cover
-                raise IndexError(key)
-            raise InvalidIndexError(key)
-
-    def set_value(self, arr, key, value):
-        """
-        Fast lookup of value from 1-dimensional ndarray.
-
-        Notes
-        -----
-        Only use this if you know what you're doing.
+        If ```keep_order == True```, the order of the data indexed by the
+        MultiIndex will not be changed; otherwise, it will tie out
+        with `other`.
         """
-        self._engine.set_value(com.values_from_object(arr),
-                               com.values_from_object(key), value)
+        from .multi import MultiIndex
 
-    def _get_level_values(self, level):
-        """
-        Return an Index of values for requested level.
+        def _get_leaf_sorter(labels):
+            """
+            Returns sorter for the inner most level while preserving the
+            order of higher levels.
+            """
+            if labels[0].size == 0:
+                return np.empty(0, dtype='int64')
 
-        This is primarily useful to get an individual level of values from a
-        MultiIndex, but is provided on Index as well for compatability.
+            if len(labels) == 1:
+                lab = ensure_int64(labels[0])
+                sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max())
+                return sorter
 
-        Parameters
-        ----------
-        level : int or str
-            It is either the integer position or the name of the level.
+            # find indexers of beginning of each set of
+            # same-key labels w.r.t all but last level
+            tic = labels[0][:-1] != labels[0][1:]
+            for lab in labels[1:-1]:
+                tic |= lab[:-1] != lab[1:]
 
-        Returns
-        -------
-        values : Index
-            Calling object, as there is only one level in the Index.
+            starts = np.hstack(([True], tic, [True])).nonzero()[0]
+            lab = ensure_int64(labels[-1])
+            return lib.get_level_sorter(lab, ensure_int64(starts))
 
-        See Also
-        --------
-        MultiIndex.get_level_values : Get values for a level of a MultiIndex.
+        if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
+            raise TypeError('Join on level between two MultiIndex objects '
+                            'is ambiguous')
 
-        Notes
-        -----
-        For Index, level should be 0, since there are no multiple levels.
+        left, right = self, other
 
-        Examples
-        --------
+        flip_order = not isinstance(self, MultiIndex)
+        if flip_order:
+            left, right = right, left
+            how = {'right': 'left', 'left': 'right'}.get(how, how)
 
-        >>> idx = pd.Index(list('abc'))
-        >>> idx
-        Index(['a', 'b', 'c'], dtype='object')
+        level = left._get_level_number(level)
+        old_level = left.levels[level]
 
-        Get level values by supplying `level` as integer:
+        if not right.is_unique:
+            raise NotImplementedError('Index._join_level on non-unique index '
+                                      'is not implemented')
 
-        >>> idx.get_level_values(0)
-        Index(['a', 'b', 'c'], dtype='object')
-        """
-        self._validate_index_level(level)
-        return self
+        new_level, left_lev_indexer, right_lev_indexer = \
+            old_level.join(right, how=how, return_indexers=True)
 
-    get_level_values = _get_level_values
+        if left_lev_indexer is None:
+            if keep_order or len(left) == 0:
+                left_indexer = None
+                join_index = left
+            else:  # sort the leaves
+                left_indexer = _get_leaf_sorter(left.labels[:level + 1])
+                join_index = left[left_indexer]
 
-    def droplevel(self, level=0):
-        """
-        Return index with requested level(s) removed.
+        else:
+            left_lev_indexer = ensure_int64(left_lev_indexer)
+            rev_indexer = lib.get_reverse_indexer(left_lev_indexer,
+                                                  len(old_level))
 
-        If resulting index has only 1 level left, the result will be
-        of Index type, not MultiIndex.
+            new_lev_labels = algos.take_nd(rev_indexer, left.labels[level],
+                                           allow_fill=False)
 
-        .. versionadded:: 0.23.1 (support for non-MultiIndex)
+            new_labels = list(left.labels)
+            new_labels[level] = new_lev_labels
 
-        Parameters
-        ----------
-        level : int, str, or list-like, default 0
-            If a string is given, must be the name of a level
-            If list-like, elements must be names or indexes of levels.
+            new_levels = list(left.levels)
+            new_levels[level] = new_level
 
-        Returns
-        -------
-        index : Index or MultiIndex
-        """
-        if not isinstance(level, (tuple, list)):
-            level = [level]
+            if keep_order:  # just drop missing values. o.w. keep order
+                left_indexer = np.arange(len(left), dtype=np.intp)
+                mask = new_lev_labels != -1
+                if not mask.all():
+                    new_labels = [lab[mask] for lab in new_labels]
+                    left_indexer = left_indexer[mask]
 
-        levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
+            else:  # tie out the order with other
+                if level == 0:  # outer most level, take the fast route
+                    ngroups = 1 + new_lev_labels.max()
+                    left_indexer, counts = libalgos.groupsort_indexer(
+                        new_lev_labels, ngroups)
 
-        if len(level) == 0:
-            return self
-        if len(level) >= self.nlevels:
-            raise ValueError("Cannot remove {} levels from an index with {} "
-                             "levels: at least one level must be "
-                             "left.".format(len(level), self.nlevels))
-        # The two checks above guarantee that here self is a MultiIndex
+                    # missing values are placed first; drop them!
+                    left_indexer = left_indexer[counts[0]:]
+                    new_labels = [lab[left_indexer] for lab in new_labels]
 
-        new_levels = list(self.levels)
-        new_labels = list(self.labels)
-        new_names = list(self.names)
+                else:  # sort the leaves
+                    mask = new_lev_labels != -1
+                    mask_all = mask.all()
+                    if not mask_all:
+                        new_labels = [lab[mask] for lab in new_labels]
 
-        for i in levnums:
-            new_levels.pop(i)
-            new_labels.pop(i)
-            new_names.pop(i)
+                    left_indexer = _get_leaf_sorter(new_labels[:level + 1])
+                    new_labels = [lab[left_indexer] for lab in new_labels]
 
-        if len(new_levels) == 1:
+                    # left_indexers are w.r.t masked frame.
+                    # reverse to original frame!
+                    if not mask_all:
+                        left_indexer = mask.nonzero()[0][left_indexer]
 
-            # set nan if needed
-            mask = new_labels[0] == -1
-            result = new_levels[0].take(new_labels[0])
-            if mask.any():
-                result = result.putmask(mask, np.nan)
+            join_index = MultiIndex(levels=new_levels, labels=new_labels,
+                                    names=left.names, verify_integrity=False)
 
-            result.name = new_names[0]
-            return result
+        if right_lev_indexer is not None:
+            right_indexer = algos.take_nd(right_lev_indexer,
+                                          join_index.labels[level],
+                                          allow_fill=False)
         else:
-            from .multi import MultiIndex
-            return MultiIndex(levels=new_levels, labels=new_labels,
-                              names=new_names, verify_integrity=False)
-
-    _index_shared_docs['get_indexer'] = """
-        Compute indexer and mask for new index given the current index. The
-        indexer should be then used as an input to ndarray.take to align the
-        current data to the new index.
-
-        Parameters
-        ----------
-        target : %(target_klass)s
-        method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
-            * default: exact matches only.
-            * pad / ffill: find the PREVIOUS index value if no exact match.
-            * backfill / bfill: use NEXT index value if no exact match
-            * nearest: use the NEAREST index value if no exact match. Tied
-              distances are broken by preferring the larger index value.
-        limit : int, optional
-            Maximum number of consecutive labels in ``target`` to match for
-            inexact matches.
-        tolerance : optional
-            Maximum distance between original and new labels for inexact
-            matches. The values of the index at the matching locations most
-            satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
-
-            Tolerance may be a scalar value, which applies the same tolerance
-            to all values, or list-like, which applies variable tolerance per
-            element. List-like includes list, tuple, array, Series, and must be
-            the same size as the index and its dtype must exactly match the
-            index's type.
+            right_indexer = join_index.labels[level]
 
-            .. versionadded:: 0.21.0 (list-like tolerance)
+        if flip_order:
+            left_indexer, right_indexer = right_indexer, left_indexer
 
-        Returns
-        -------
-        indexer : ndarray of int
-            Integers from 0 to n - 1 indicating that the index at these
-            positions matches the corresponding target values. Missing values
-            in the target are marked by -1.
+        if return_indexers:
+            left_indexer = (None if left_indexer is None
+                            else ensure_platform_int(left_indexer))
+            right_indexer = (None if right_indexer is None
+                             else ensure_platform_int(right_indexer))
+            return join_index, left_indexer, right_indexer
+        else:
+            return join_index
 
-        Examples
-        --------
-        >>> index = pd.Index(['c', 'a', 'b'])
-        >>> index.get_indexer(['a', 'b', 'x'])
-        array([ 1,  2, -1])
+    def _join_monotonic(self, other, how='left', return_indexers=False):
+        if self.equals(other):
+            ret_index = other if how == 'right' else self
+            if return_indexers:
+                return ret_index, None, None
+            else:
+                return ret_index
 
-        Notice that the return value is an array of locations in ``index``
-        and ``x`` is marked by -1, as it is not in ``index``.
-        """
+        sv = self._ndarray_values
+        ov = other._ndarray_values
 
-    @Appender(_index_shared_docs['get_indexer'] % _index_doc_kwargs)
-    def get_indexer(self, target, method=None, limit=None, tolerance=None):
-        method = missing.clean_reindex_fill_method(method)
-        target = ensure_index(target)
-        if tolerance is not None:
-            tolerance = self._convert_tolerance(tolerance, target)
+        if self.is_unique and other.is_unique:
+            # We can perform much better than the general case
+            if how == 'left':
+                join_index = self
+                lidx = None
+                ridx = self._left_indexer_unique(sv, ov)
+            elif how == 'right':
+                join_index = other
+                lidx = self._left_indexer_unique(ov, sv)
+                ridx = None
+            elif how == 'inner':
+                join_index, lidx, ridx = self._inner_indexer(sv, ov)
+                join_index = self._wrap_joined_index(join_index, other)
+            elif how == 'outer':
+                join_index, lidx, ridx = self._outer_indexer(sv, ov)
+                join_index = self._wrap_joined_index(join_index, other)
+        else:
+            if how == 'left':
+                join_index, lidx, ridx = self._left_indexer(sv, ov)
+            elif how == 'right':
+                join_index, ridx, lidx = self._left_indexer(ov, sv)
+            elif how == 'inner':
+                join_index, lidx, ridx = self._inner_indexer(sv, ov)
+            elif how == 'outer':
+                join_index, lidx, ridx = self._outer_indexer(sv, ov)
+            join_index = self._wrap_joined_index(join_index, other)
 
-        # Treat boolean labels passed to a numeric index as not found. Without
-        # this fix False and True would be treated as 0 and 1 respectively.
-        # (GH #16877)
-        if target.is_boolean() and self.is_numeric():
-            return ensure_platform_int(np.repeat(-1, target.size))
+        if return_indexers:
+            lidx = None if lidx is None else ensure_platform_int(lidx)
+            ridx = None if ridx is None else ensure_platform_int(ridx)
+            return join_index, lidx, ridx
+        else:
+            return join_index
 
-        pself, ptarget = self._maybe_promote(target)
-        if pself is not self or ptarget is not target:
-            return pself.get_indexer(ptarget, method=method, limit=limit,
-                                     tolerance=tolerance)
+    def _wrap_joined_index(self, joined, other):
+        name = get_op_result_name(self, other)
+        return Index(joined, name=name)
 
-        if not is_dtype_equal(self.dtype, target.dtype):
-            this = self.astype(object)
-            target = target.astype(object)
-            return this.get_indexer(target, method=method, limit=limit,
-                                    tolerance=tolerance)
+    # --------------------------------------------------------------------
+    # Uncategorized Methods
 
-        if not self.is_unique:
-            raise InvalidIndexError('Reindexing only valid with uniquely'
-                                    ' valued Index objects')
+    @property
+    def values(self):
+        """
+        Return the underlying data as an ndarray.
+        """
+        return self._data.view(np.ndarray)
 
-        if method == 'pad' or method == 'backfill':
-            indexer = self._get_fill_indexer(target, method, limit, tolerance)
-        elif method == 'nearest':
-            indexer = self._get_nearest_indexer(target, limit, tolerance)
-        else:
-            if tolerance is not None:
-                raise ValueError('tolerance argument only valid if doing pad, '
-                                 'backfill or nearest reindexing')
-            if limit is not None:
-                raise ValueError('limit argument only valid if doing pad, '
-                                 'backfill or nearest reindexing')
+    @property
+    def _values(self):
+        # type: () -> Union[ExtensionArray, Index, np.ndarray]
+        # TODO(EA): remove index types as they become extension arrays
+        """
+        The best array representation.
 
-            indexer = self._engine.get_indexer(target._ndarray_values)
+        This is an ndarray, ExtensionArray, or Index subclass. This differs
+        from ``_ndarray_values``, which always returns an ndarray.
 
-        return ensure_platform_int(indexer)
+        Both ``_values`` and ``_ndarray_values`` are consistent between
+        ``Series`` and ``Index``.
 
-    def _convert_tolerance(self, tolerance, target):
-        # override this method on subclasses
-        tolerance = np.asarray(tolerance)
-        if target.size != tolerance.size and tolerance.size > 1:
-            raise ValueError('list-like tolerance size must match '
-                             'target index size')
-        return tolerance
+        It may differ from the public '.values' method.
 
-    def _get_fill_indexer(self, target, method, limit=None, tolerance=None):
-        if self.is_monotonic_increasing and target.is_monotonic_increasing:
-            method = (self._engine.get_pad_indexer if method == 'pad' else
-                      self._engine.get_backfill_indexer)
-            indexer = method(target._ndarray_values, limit)
-        else:
-            indexer = self._get_fill_indexer_searchsorted(target, method,
-                                                          limit)
-        if tolerance is not None:
-            indexer = self._filter_indexer_tolerance(target._ndarray_values,
-                                                     indexer,
-                                                     tolerance)
-        return indexer
+        index             | values          | _values       | _ndarray_values |
+        ----------------- | --------------- | ------------- | --------------- |
+        Index             | ndarray         | ndarray       | ndarray         |
+        CategoricalIndex  | Categorical     | Categorical   | ndarray[int]    |
+        DatetimeIndex     | ndarray[M8ns]   | ndarray[M8ns] | ndarray[M8ns]   |
+        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]       | ndarray[M8ns]   |
+        PeriodIndex       | ndarray[object] | PeriodArray   | ndarray[int]    |
+        IntervalIndex     | IntervalArray   | IntervalArray | ndarray[object] |
 
-    def _get_fill_indexer_searchsorted(self, target, method, limit=None):
-        """
-        Fallback pad/backfill get_indexer that works for monotonic decreasing
-        indexes and non-monotonic targets.
+        See Also
+        --------
+        values
+        _ndarray_values
         """
-        if limit is not None:
-            raise ValueError('limit argument for %r method only well-defined '
-                             'if index and target are monotonic' % method)
+        return self.values
 
-        side = 'left' if method == 'pad' else 'right'
+    def get_values(self):
+        """
+        Return `Index` data as an `numpy.ndarray`.
 
-        # find exact matches first (this simplifies the algorithm)
-        indexer = self.get_indexer(target)
-        nonexact = (indexer == -1)
-        indexer[nonexact] = self._searchsorted_monotonic(target[nonexact],
-                                                         side)
-        if side == 'left':
-            # searchsorted returns "indices into a sorted array such that,
-            # if the corresponding elements in v were inserted before the
-            # indices, the order of a would be preserved".
-            # Thus, we need to subtract 1 to find values to the left.
-            indexer[nonexact] -= 1
-            # This also mapped not found values (values of 0 from
-            # np.searchsorted) to -1, which conveniently is also our
-            # sentinel for missing values
-        else:
-            # Mark indices to the right of the largest value as not found
-            indexer[indexer == len(self)] = -1
-        return indexer
+        Returns
+        -------
+        numpy.ndarray
+            A one-dimensional numpy array of the `Index` values.
 
-    def _get_nearest_indexer(self, target, limit, tolerance):
-        """
-        Get the indexer for the nearest index labels; requires an index with
-        values that can be subtracted from each other (e.g., not strings or
-        tuples).
-        """
-        left_indexer = self.get_indexer(target, 'pad', limit=limit)
-        right_indexer = self.get_indexer(target, 'backfill', limit=limit)
+        See Also
+        --------
+        Index.values : The attribute that get_values wraps.
 
-        target = np.asarray(target)
-        left_distances = abs(self.values[left_indexer] - target)
-        right_distances = abs(self.values[right_indexer] - target)
+        Examples
+        --------
+        Getting the `Index` values of a `DataFrame`:
 
-        op = operator.lt if self.is_monotonic_increasing else operator.le
-        indexer = np.where(op(left_distances, right_distances) |
-                           (right_indexer == -1), left_indexer, right_indexer)
-        if tolerance is not None:
-            indexer = self._filter_indexer_tolerance(target, indexer,
-                                                     tolerance)
-        return indexer
+        >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        ...                    index=['a', 'b', 'c'], columns=['A', 'B', 'C'])
+        >>> df
+           A  B  C
+        a  1  2  3
+        b  4  5  6
+        c  7  8  9
+        >>> df.index.get_values()
+        array(['a', 'b', 'c'], dtype=object)
 
-    def _filter_indexer_tolerance(self, target, indexer, tolerance):
-        distance = abs(self.values[indexer] - target)
-        indexer = np.where(distance <= tolerance, indexer, -1)
-        return indexer
+        Standalone `Index` values:
 
-    _index_shared_docs['get_indexer_non_unique'] = """
-        Compute indexer and mask for new index given the current index. The
-        indexer should be then used as an input to ndarray.take to align the
-        current data to the new index.
+        >>> idx = pd.Index(['1', '2', '3'])
+        >>> idx.get_values()
+        array(['1', '2', '3'], dtype=object)
 
-        Parameters
-        ----------
-        target : %(target_klass)s
+        `MultiIndex` arrays also have only one dimension:
 
-        Returns
-        -------
-        indexer : ndarray of int
-            Integers from 0 to n - 1 indicating that the index at these
-            positions matches the corresponding target values. Missing values
-            in the target are marked by -1.
-        missing : ndarray of int
-            An indexer into the target of the values not found.
-            These correspond to the -1 in the indexer array
+        >>> midx = pd.MultiIndex.from_arrays([[1, 2, 3], ['a', 'b', 'c']],
+        ...                                  names=('number', 'letter'))
+        >>> midx.get_values()
+        array([(1, 'a'), (2, 'b'), (3, 'c')], dtype=object)
+        >>> midx.get_values().ndim
+        1
         """
+        return self.values
 
-    @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
-    def get_indexer_non_unique(self, target):
-        target = ensure_index(target)
-        if is_categorical(target):
-            target = target.astype(target.dtype.categories.dtype)
-        pself, ptarget = self._maybe_promote(target)
-        if pself is not self or ptarget is not target:
-            return pself.get_indexer_non_unique(ptarget)
+    @Appender(IndexOpsMixin.memory_usage.__doc__)
+    def memory_usage(self, deep=False):
+        result = super(Index, self).memory_usage(deep=deep)
 
-        if self.is_all_dates:
-            self = Index(self.asi8)
-            tgt_values = target.asi8
-        else:
-            tgt_values = target._ndarray_values
+        # include our engine hashtable
+        result += self._engine.sizeof(deep=deep)
+        return result
 
-        indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
-        return ensure_platform_int(indexer), missing
+    _index_shared_docs['where'] = """
+        Return an Index of same shape as self and whose corresponding
+        entries are from self where cond is True and otherwise are from
+        other.
 
-    def get_indexer_for(self, target, **kwargs):
-        """
-        Guaranteed return of an indexer even when non-unique.
+        .. versionadded:: 0.19.0
 
-        This dispatches to get_indexer or get_indexer_nonunique
-        as appropriate.
+        Parameters
+        ----------
+        cond : boolean array-like with the same length as self
+        other : scalar, or array-like
         """
-        if self.is_unique:
-            return self.get_indexer(target, **kwargs)
-        indexer, _ = self.get_indexer_non_unique(target, **kwargs)
-        return indexer
 
-    def _maybe_promote(self, other):
-        # A hack, but it works
-        from pandas import DatetimeIndex
-        if self.inferred_type == 'date' and isinstance(other, DatetimeIndex):
-            return DatetimeIndex(self), other
-        elif self.inferred_type == 'boolean':
-            if not is_object_dtype(self.dtype):
-                return self.astype('object'), other.astype('object')
-        return self, other
+    @Appender(_index_shared_docs['where'])
+    def where(self, cond, other=None):
+        if other is None:
+            other = self._na_value
 
-    def groupby(self, values):
-        """
-        Group the index labels by a given array of values.
+        dtype = self.dtype
+        values = self.values
 
-        Parameters
-        ----------
-        values : array
-            Values used to determine the groups.
+        if is_bool(other) or is_bool_dtype(other):
 
-        Returns
-        -------
-        groups : dict
-            {group name -> group labels}
-        """
+            # bools force casting
+            values = values.astype(object)
+            dtype = None
 
-        # TODO: if we are a MultiIndex, we can do better
-        # that converting to tuples
-        from .multi import MultiIndex
-        if isinstance(values, MultiIndex):
-            values = values.values
-        values = ensure_categorical(values)
-        result = values._reverse_indexer()
+        values = np.where(cond, values, other)
 
-        # map to the label
-        result = {k: self.take(v) for k, v in compat.iteritems(result)}
+        if self._is_numeric_dtype and np.any(isna(values)):
+            # We can't coerce to the numeric dtype of "self" (unless
+            # it's float) if there are NaN values in our output.
+            dtype = None
 
-        return result
+        return self._shallow_copy_with_infer(values, dtype=dtype)
 
-    def map(self, mapper, na_action=None):
+    # construction helpers
+    @classmethod
+    def _try_convert_to_int_index(cls, data, copy, name, dtype):
         """
-        Map values using input correspondence (a dict, Series, or function).
+        Attempt to convert an array of data into an integer index.
 
         Parameters
         ----------
-        mapper : function, dict, or Series
-            Mapping correspondence.
-        na_action : {None, 'ignore'}
-            If 'ignore', propagate NA values, without passing them to the
-            mapping correspondence.
+        data : The data to convert.
+        copy : Whether to copy the data or not.
+        name : The name of the index returned.
 
         Returns
         -------
-        applied : Union[Index, MultiIndex], inferred
-            The output of the mapping function applied to the index.
-            If the function returns a tuple with more than one element
-            a MultiIndex will be returned.
+        int_index : data converted to either an Int64Index or a
+                    UInt64Index
+
+        Raises
+        ------
+        ValueError if the conversion was not successful.
         """
 
-        from .multi import MultiIndex
-        new_values = super(Index, self)._map_values(
-            mapper, na_action=na_action)
+        from .numeric import Int64Index, UInt64Index
+        if not is_unsigned_integer_dtype(dtype):
+            # skip int64 conversion attempt if uint-like dtype is passed, as
+            # this could return Int64Index when UInt64Index is what's desrired
+            try:
+                res = data.astype('i8', copy=False)
+                if (res == data).all():
+                    return Int64Index(res, copy=copy, name=name)
+            except (OverflowError, TypeError, ValueError):
+                pass
 
-        attributes = self._get_attributes_dict()
+        # Conversion to int64 failed (possibly due to overflow) or was skipped,
+        # so let's try now with uint64.
+        try:
+            res = data.astype('u8', copy=False)
+            if (res == data).all():
+                return UInt64Index(res, copy=copy, name=name)
+        except (OverflowError, TypeError, ValueError):
+            pass
 
-        # we can return a MultiIndex
-        if new_values.size and isinstance(new_values[0], tuple):
-            if isinstance(self, MultiIndex):
-                names = self.names
-            elif attributes.get('name'):
-                names = [attributes.get('name')] * len(new_values[0])
-            else:
-                names = None
-            return MultiIndex.from_tuples(new_values,
-                                          names=names)
+        raise ValueError
 
-        attributes['copy'] = False
-        if not new_values.size:
-            # empty
-            attributes['dtype'] = self.dtype
+    @classmethod
+    def _scalar_data_error(cls, data):
+        raise TypeError('{0}(...) must be called with a collection of some '
+                        'kind, {1} was passed'.format(cls.__name__,
+                                                      repr(data)))
 
-        return Index(new_values, **attributes)
+    @classmethod
+    def _string_data_error(cls, data):
+        raise TypeError('String dtype not supported, you may need '
+                        'to explicitly cast to a numeric type')
 
-    def isin(self, values, level=None):
+    @classmethod
+    def _coerce_to_ndarray(cls, data):
         """
-        Return a boolean array where the index values are in `values`.
+        Coerces data to ndarray.
 
-        Compute boolean array of whether each index value is found in the
-        passed set of values. The length of the returned boolean array matches
-        the length of the index.
+        Converts other iterables to list first and then to array.
+        Does not touch ndarrays.
 
-        Parameters
-        ----------
-        values : set or list-like
-            Sought values.
+        Raises
+        ------
+        TypeError
+            When the data passed in is a scalar.
+        """
 
-            .. versionadded:: 0.18.1
+        if not isinstance(data, (np.ndarray, Index)):
+            if data is None or is_scalar(data):
+                cls._scalar_data_error(data)
 
-               Support for values as a set.
+            # other iterable of some kind
+            if not isinstance(data, (ABCSeries, list, tuple)):
+                data = list(data)
+            data = np.asarray(data)
+        return data
 
-        level : str or int, optional
-            Name or position of the index level to use (if the index is a
-            `MultiIndex`).
+    def _coerce_scalar_to_index(self, item):
+        """
+        We need to coerce a scalar to a compat for our index type.
 
-        Returns
-        -------
-        is_contained : ndarray
-            NumPy array of boolean values.
+        Parameters
+        ----------
+        item : scalar item to coerce
+        """
+        dtype = self.dtype
 
-        See Also
-        --------
-        Series.isin : Same for Series.
-        DataFrame.isin : Same method for DataFrames.
+        if self._is_numeric_dtype and isna(item):
+            # We can't coerce to the numeric dtype of "self" (unless
+            # it's float) if there are NaN values in our output.
+            dtype = None
 
-        Notes
-        -----
-        In the case of `MultiIndex` you must either specify `values` as a
-        list-like object containing tuples that are the same length as the
-        number of levels, or specify `level`. Otherwise it will raise a
-        ``ValueError``.
+        return Index([item], dtype=dtype, **self._get_attributes_dict())
 
-        If `level` is specified:
+    def _to_safe_for_reshape(self):
+        """
+        Convert to object if we are a categorical.
+        """
+        return self
 
-        - if it is the name of one *and only one* index level, use that level;
-        - otherwise it should be a number indicating level position.
+    def _convert_for_op(self, value):
+        """
+        Convert value to be insertable to ndarray.
+        """
+        return value
 
-        Examples
-        --------
-        >>> idx = pd.Index([1,2,3])
-        >>> idx
-        Int64Index([1, 2, 3], dtype='int64')
+    def _assert_can_do_op(self, value):
+        """
+        Check value is valid for scalar op.
+        """
+        if not is_scalar(value):
+            msg = "'value' must be a scalar, passed: {0}"
+            raise TypeError(msg.format(type(value).__name__))
 
-        Check whether each index value in a list of values.
-        >>> idx.isin([1, 4])
-        array([ True, False, False])
+    @property
+    def _has_complex_internals(self):
+        # to disable groupby tricks in MultiIndex
+        return False
 
-        >>> midx = pd.MultiIndex.from_arrays([[1,2,3],
-        ...                                  ['red', 'blue', 'green']],
-        ...                                  names=('number', 'color'))
-        >>> midx
-        MultiIndex(levels=[[1, 2, 3], ['blue', 'green', 'red']],
-                   labels=[[0, 1, 2], [2, 0, 1]],
-                   names=['number', 'color'])
+    def _is_memory_usage_qualified(self):
+        """
+        Return a boolean if we need a qualified .info display.
+        """
+        return self.is_object()
 
-        Check whether the strings in the 'color' level of the MultiIndex
-        are in a list of colors.
+    def is_type_compatible(self, kind):
+        return kind == self.inferred_type
 
-        >>> midx.isin(['red', 'orange', 'yellow'], level='color')
-        array([ True, False, False])
+    _index_shared_docs['__contains__'] = """
+        Return a boolean if this key is IN the index.
 
-        To check across the levels of a MultiIndex, pass a list of tuples:
+        Parameters
+        ----------
+        key : object
 
-        >>> midx.isin([(1, 'red'), (3, 'red')])
-        array([ True, False, False])
+        Returns
+        -------
+        boolean
+        """
 
-        For a DatetimeIndex, string values in `values` are converted to
-        Timestamps.
+    @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs)
+    def __contains__(self, key):
+        hash(key)
+        try:
+            return key in self._engine
+        except (OverflowError, TypeError, ValueError):
+            return False
 
-        >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']
-        >>> dti = pd.to_datetime(dates)
-        >>> dti
-        DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],
-        dtype='datetime64[ns]', freq=None)
-
-        >>> dti.isin(['2000-03-11'])
-        array([ True, False, False])
-        """
-        if level is not None:
-            self._validate_index_level(level)
-        return algos.isin(self, values)
-
-    def _can_reindex(self, indexer):
-        """
-        Check if we are allowing reindexing with this particular indexer.
+    _index_shared_docs['contains'] = """
+        Return a boolean if this key is IN the index.
 
         Parameters
         ----------
-        indexer : an integer indexer
+        key : object
 
-        Raises
-        ------
-        ValueError if its a duplicate axis
+        Returns
+        -------
+        boolean
         """
 
-        # trying to reindex on an axis with duplicates
-        if not self.is_unique and len(indexer):
-            raise ValueError("cannot reindex from a duplicate axis")
+    @Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
+    def contains(self, key):
+        hash(key)
+        try:
+            return key in self._engine
+        except (TypeError, ValueError):
+            return False
 
-    def reindex(self, target, method=None, level=None, limit=None,
-                tolerance=None):
+    def __hash__(self):
+        raise TypeError("unhashable type: %r" % type(self).__name__)
+
+    def __setitem__(self, key, value):
+        raise TypeError("Index does not support mutable operations")
+
+    def __getitem__(self, key):
         """
-        Create index with target's values (move/add/delete values
-        as necessary).
+        Override numpy.ndarray's __getitem__ method to work as desired.
 
-        Parameters
-        ----------
-        target : an iterable
+        This function adds lists and Series as valid boolean indexers
+        (ndarrays only supports ndarray with dtype=bool).
 
-        Returns
-        -------
-        new_index : pd.Index
-            Resulting index
-        indexer : np.ndarray or None
-            Indices of output values in original index
+        If resulting ndim != 1, plain ndarray is returned instead of
+        corresponding `Index` subclass.
 
         """
-        # GH6552: preserve names when reindexing to non-named target
-        # (i.e. neither Index nor Series).
-        preserve_names = not hasattr(target, 'name')
+        # There's no custom logic to be implemented in __getslice__, so it's
+        # not overloaded intentionally.
+        getitem = self._data.__getitem__
+        promote = self._shallow_copy
 
-        # GH7774: preserve dtype/tz if target is empty and not an Index.
-        target = _ensure_has_len(target)  # target may be an iterator
+        if is_scalar(key):
+            key = com.cast_scalar_indexer(key)
+            return getitem(key)
 
-        if not isinstance(target, Index) and len(target) == 0:
-            attrs = self._get_attributes_dict()
-            attrs.pop('freq', None)  # don't preserve freq
-            values = self._data[:0]  # appropriately-dtyped empty array
-            target = self._simple_new(values, dtype=self.dtype, **attrs)
-        else:
-            target = ensure_index(target)
+        if isinstance(key, slice):
+            # This case is separated from the conditional above to avoid
+            # pessimization of basic indexing.
+            return promote(getitem(key))
 
-        if level is not None:
-            if method is not None:
-                raise TypeError('Fill method not supported if level passed')
-            _, indexer, _ = self._join_level(target, level, how='right',
-                                             return_indexers=True)
-        else:
-            if self.equals(target):
-                indexer = None
-            else:
+        if com.is_bool_indexer(key):
+            key = np.asarray(key, dtype=bool)
 
-                if self.is_unique:
-                    indexer = self.get_indexer(target, method=method,
-                                               limit=limit,
-                                               tolerance=tolerance)
-                else:
-                    if method is not None or limit is not None:
-                        raise ValueError("cannot reindex a non-unique index "
-                                         "with a method or limit")
-                    indexer, missing = self.get_indexer_non_unique(target)
+        key = com.values_from_object(key)
+        result = getitem(key)
+        if not is_scalar(result):
+            return promote(result)
+        else:
+            return result
 
-        if preserve_names and target.nlevels == 1 and target.name != self.name:
-            target = target.copy()
-            target.name = self.name
+    def _can_hold_identifiers_and_holds_name(self, name):
+        """
+        Faster check for ``name in self`` when we know `name` is a Python
+        identifier (e.g. in NDFrame.__getattr__, which hits this to support
+        . key lookup). For indexes that can't hold identifiers (everything
+        but object & categorical) we just return False.
 
-        return target, indexer
+        https://github.com/pandas-dev/pandas/issues/19764
+        """
+        if self.is_object() or self.is_categorical():
+            return name in self
+        return False
 
-    def _reindex_non_unique(self, target):
+    def append(self, other):
         """
-        Create a new index with target's values (move/add/delete values as
-        necessary) use with non-unique Index and a possibly non-unique target.
+        Append a collection of Index options together.
 
         Parameters
         ----------
-        target : an iterable
+        other : Index or list/tuple of indices
 
         Returns
         -------
-        new_index : pd.Index
-            Resulting index
-        indexer : np.ndarray or None
-            Indices of output values in original index
-
+        appended : Index
         """
 
-        target = ensure_index(target)
-        indexer, missing = self.get_indexer_non_unique(target)
-        check = indexer != -1
-        new_labels = self.take(indexer[check])
-        new_indexer = None
-
-        if len(missing):
-            length = np.arange(len(indexer))
-
-            missing = ensure_platform_int(missing)
-            missing_labels = target.take(missing)
-            missing_indexer = ensure_int64(length[~check])
-            cur_labels = self.take(indexer[check]).values
-            cur_indexer = ensure_int64(length[check])
-
-            new_labels = np.empty(tuple([len(indexer)]), dtype=object)
-            new_labels[cur_indexer] = cur_labels
-            new_labels[missing_indexer] = missing_labels
+        to_concat = [self]
 
-            # a unique indexer
-            if target.is_unique:
+        if isinstance(other, (list, tuple)):
+            to_concat = to_concat + list(other)
+        else:
+            to_concat.append(other)
 
-                # see GH5553, make sure we use the right indexer
-                new_indexer = np.arange(len(indexer))
-                new_indexer[cur_indexer] = np.arange(len(cur_labels))
-                new_indexer[missing_indexer] = -1
+        for obj in to_concat:
+            if not isinstance(obj, Index):
+                raise TypeError('all inputs must be Index')
 
-            # we have a non_unique selector, need to use the original
-            # indexer here
-            else:
+        names = {obj.name for obj in to_concat}
+        name = None if len(names) > 1 else self.name
 
-                # need to retake to have the same size as the indexer
-                indexer[~check] = -1
+        return self._concat(to_concat, name)
 
-                # reset the new indexer to account for the new size
-                new_indexer = np.arange(len(self.take(indexer)))
-                new_indexer[~check] = -1
+    def _concat(self, to_concat, name):
 
-        new_index = self._shallow_copy_with_infer(new_labels, freq=None)
-        return new_index, indexer, new_indexer
+        typs = _concat.get_dtype_kinds(to_concat)
 
-    _index_shared_docs['join'] = """
-        Compute join_index and indexers to conform data
-        structures to the new index.
+        if len(typs) == 1:
+            return self._concat_same_dtype(to_concat, name=name)
+        return _concat._concat_index_asobject(to_concat, name=name)
 
-        Parameters
-        ----------
-        other : Index
-        how : {'left', 'right', 'inner', 'outer'}
-        level : int or level name, default None
-        return_indexers : boolean, default False
-        sort : boolean, default False
-            Sort the join keys lexicographically in the result Index. If False,
-            the order of the join keys depends on the join type (how keyword)
+    def _concat_same_dtype(self, to_concat, name):
+        """
+        Concatenate to_concat which has the same class.
+        """
+        # must be overridden in specific classes
+        return _concat._concat_index_asobject(to_concat, name)
 
-            .. versionadded:: 0.20.0
+    def putmask(self, mask, value):
+        """
+        Return a new Index of the values set with the mask.
 
-        Returns
-        -------
-        join_index, (left_indexer, right_indexer)
+        See Also
+        --------
+        numpy.ndarray.putmask
         """
+        values = self.values.copy()
+        try:
+            np.putmask(values, mask, self._convert_for_op(value))
+            return self._shallow_copy(values)
+        except (ValueError, TypeError) as err:
+            if is_object_dtype(self):
+                raise err
 
-    @Appender(_index_shared_docs['join'])
-    def join(self, other, how='left', level=None, return_indexers=False,
-             sort=False):
-        from .multi import MultiIndex
-        self_is_mi = isinstance(self, MultiIndex)
-        other_is_mi = isinstance(other, MultiIndex)
+            # coerces to object
+            return self.astype(object).putmask(mask, value)
 
-        # try to figure out the join level
-        # GH3662
-        if level is None and (self_is_mi or other_is_mi):
+    def equals(self, other):
+        """
+        Determines if two Index objects contain the same elements.
+        """
+        if self.is_(other):
+            return True
 
-            # have the same levels/names so a simple join
-            if self.names == other.names:
-                pass
-            else:
-                return self._join_multi(other, how=how,
-                                        return_indexers=return_indexers)
+        if not isinstance(other, Index):
+            return False
 
-        # join on the level
-        if level is not None and (self_is_mi or other_is_mi):
-            return self._join_level(other, level, how=how,
-                                    return_indexers=return_indexers)
+        if is_object_dtype(self) and not is_object_dtype(other):
+            # if other is not object, use other's logic for coercion
+            return other.equals(self)
 
-        other = ensure_index(other)
+        try:
+            return array_equivalent(com.values_from_object(self),
+                                    com.values_from_object(other))
+        except Exception:
+            return False
 
-        if len(other) == 0 and how in ('left', 'outer'):
-            join_index = self._shallow_copy()
-            if return_indexers:
-                rindexer = np.repeat(-1, len(join_index))
-                return join_index, None, rindexer
-            else:
-                return join_index
+    def identical(self, other):
+        """
+        Similar to equals, but check that other comparable attributes are
+        also equal.
+        """
+        return (self.equals(other) and
+                all((getattr(self, c, None) == getattr(other, c, None)
+                     for c in self._comparables)) and
+                type(self) == type(other))
 
-        if len(self) == 0 and how in ('right', 'outer'):
-            join_index = other._shallow_copy()
-            if return_indexers:
-                lindexer = np.repeat(-1, len(join_index))
-                return join_index, lindexer, None
-            else:
-                return join_index
+    def asof(self, label):
+        """
+        Return the label from the index, or, if not present, the previous one.
 
-        if self._join_precedence < other._join_precedence:
-            how = {'right': 'left', 'left': 'right'}.get(how, how)
-            result = other.join(self, how=how, level=level,
-                                return_indexers=return_indexers)
-            if return_indexers:
-                x, y, z = result
-                result = x, z, y
-            return result
+        Assuming that the index is sorted, return the passed index label if it
+        is in the index, or return the previous index label if the passed one
+        is not in the index.
 
-        if not is_dtype_equal(self.dtype, other.dtype):
-            this = self.astype('O')
-            other = other.astype('O')
-            return this.join(other, how=how, return_indexers=return_indexers)
+        Parameters
+        ----------
+        label : object
+            The label up to which the method returns the latest index label.
 
-        _validate_join_method(how)
+        Returns
+        -------
+        object
+            The passed label if it is in the index. The previous label if the
+            passed label is not in the sorted index or `NaN` if there is no
+            such label.
 
-        if not self.is_unique and not other.is_unique:
-            return self._join_non_unique(other, how=how,
-                                         return_indexers=return_indexers)
-        elif not self.is_unique or not other.is_unique:
-            if self.is_monotonic and other.is_monotonic:
-                return self._join_monotonic(other, how=how,
-                                            return_indexers=return_indexers)
-            else:
-                return self._join_non_unique(other, how=how,
-                                             return_indexers=return_indexers)
-        elif self.is_monotonic and other.is_monotonic:
-            try:
-                return self._join_monotonic(other, how=how,
-                                            return_indexers=return_indexers)
-            except TypeError:
-                pass
+        See Also
+        --------
+        Series.asof : Return the latest value in a Series up to the
+            passed index.
+        merge_asof : Perform an asof merge (similar to left join but it
+            matches on nearest key rather than equal key).
+        Index.get_loc : An `asof` is a thin wrapper around `get_loc`
+            with method='pad'.
 
-        if how == 'left':
-            join_index = self
-        elif how == 'right':
-            join_index = other
-        elif how == 'inner':
-            join_index = self.intersection(other)
-        elif how == 'outer':
-            join_index = self.union(other)
+        Examples
+        --------
+        `Index.asof` returns the latest index label up to the passed label.
 
-        if sort:
-            join_index = join_index.sort_values()
+        >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])
+        >>> idx.asof('2014-01-01')
+        '2013-12-31'
 
-        if return_indexers:
-            if join_index is self:
-                lindexer = None
-            else:
-                lindexer = self.get_indexer(join_index)
-            if join_index is other:
-                rindexer = None
-            else:
-                rindexer = other.get_indexer(join_index)
-            return join_index, lindexer, rindexer
+        If the label is in the index, the method returns the passed label.
+
+        >>> idx.asof('2014-01-02')
+        '2014-01-02'
+
+        If all of the labels in the index are later than the passed label,
+        NaN is returned.
+
+        >>> idx.asof('1999-01-02')
+        nan
+
+        If the index is not sorted, an error is raised.
+
+        >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',
+        ...                            '2014-01-03'])
+        >>> idx_not_sorted.asof('2013-12-31')
+        Traceback (most recent call last):
+        ValueError: index must be monotonic increasing or decreasing
+        """
+        try:
+            loc = self.get_loc(label, method='pad')
+        except KeyError:
+            return self._na_value
         else:
-            return join_index
+            if isinstance(loc, slice):
+                loc = loc.indices(len(self))[-1]
+            return self[loc]
 
-    def _join_multi(self, other, how, return_indexers=True):
-        from .multi import MultiIndex
-        from pandas.core.reshape.merge import _restore_dropped_levels_multijoin
+    def asof_locs(self, where, mask):
+        """
+        Finds the locations (indices) of the labels from the index for
+        every entry in the `where` argument.
 
-        # figure out join names
-        self_names = set(com._not_none(*self.names))
-        other_names = set(com._not_none(*other.names))
-        overlap = self_names & other_names
+        As in the `asof` function, if the label (a particular entry in
+        `where`) is not in the index, the latest index label upto the
+        passed label is chosen and its index returned.
 
-        # need at least 1 in common
-        if not overlap:
-            raise ValueError("cannot join with no overlapping index names")
+        If all of the labels in the index are later than a label in `where`,
+        -1 is returned.
 
-        self_is_mi = isinstance(self, MultiIndex)
-        other_is_mi = isinstance(other, MultiIndex)
+        `mask` is used to ignore NA values in the index during calculation.
 
-        if self_is_mi and other_is_mi:
+        Parameters
+        ----------
+        where : Index
+            An Index consisting of an array of timestamps.
+        mask : array-like
+            Array of booleans denoting where values in the original
+            data are not NA.
 
-            # Drop the non-matching levels from left and right respectively
-            ldrop_names = list(self_names - overlap)
-            rdrop_names = list(other_names - overlap)
+        Returns
+        -------
+        numpy.ndarray
+            An array of locations (indices) of the labels from the Index
+            which correspond to the return values of the `asof` function
+            for every element in `where`.
+        """
+        locs = self.values[mask].searchsorted(where.values, side='right')
+        locs = np.where(locs > 0, locs - 1, 0)
 
-            self_jnlevels = self.droplevel(ldrop_names)
-            other_jnlevels = other.droplevel(rdrop_names)
+        result = np.arange(len(self))[mask].take(locs)
 
-            # Join left and right
-            # Join on same leveled multi-index frames is supported
-            join_idx, lidx, ridx = self_jnlevels.join(other_jnlevels, how,
-                                                      return_indexers=True)
+        first = mask.argmax()
+        result[(locs == 0) & (where.values < self.values[first])] = -1
 
-            # Restore the dropped levels
-            # Returned index level order is
-            # common levels, ldrop_names, rdrop_names
-            dropped_names = ldrop_names + rdrop_names
+        return result
 
-            levels, labels, names = (
-                _restore_dropped_levels_multijoin(self, other,
-                                                  dropped_names,
-                                                  join_idx,
-                                                  lidx, ridx))
+    def sort_values(self, return_indexer=False, ascending=True):
+        """
+        Return a sorted copy of the index.
 
-            # Re-create the multi-index
-            multi_join_idx = MultiIndex(levels=levels, labels=labels,
-                                        names=names, verify_integrity=False)
+        Return a sorted copy of the index, and optionally return the indices
+        that sorted the index itself.
 
-            multi_join_idx = multi_join_idx.remove_unused_levels()
+        Parameters
+        ----------
+        return_indexer : bool, default False
+            Should the indices that would sort the index be returned.
+        ascending : bool, default True
+            Should the index values be sorted in an ascending order.
 
-            return multi_join_idx, lidx, ridx
+        Returns
+        -------
+        sorted_index : pandas.Index
+            Sorted copy of the index.
+        indexer : numpy.ndarray, optional
+            The indices that the index itself was sorted by.
 
-        jl = list(overlap)[0]
+        See Also
+        --------
+        pandas.Series.sort_values : Sort values of a Series.
+        pandas.DataFrame.sort_values : Sort values in a DataFrame.
 
-        # Case where only one index is multi
-        # make the indices into mi's that match
-        flip_order = False
-        if self_is_mi:
-            self, other = other, self
-            flip_order = True
-            # flip if join method is right or left
-            how = {'right': 'left', 'left': 'right'}.get(how, how)
+        Examples
+        --------
+        >>> idx = pd.Index([10, 100, 1, 1000])
+        >>> idx
+        Int64Index([10, 100, 1, 1000], dtype='int64')
+
+        Sort values in ascending order (default behavior).
+
+        >>> idx.sort_values()
+        Int64Index([1, 10, 100, 1000], dtype='int64')
+
+        Sort values in descending order, and also get the indices `idx` was
+        sorted by.
+
+        >>> idx.sort_values(ascending=False, return_indexer=True)
+        (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
+        """
+        _as = self.argsort()
+        if not ascending:
+            _as = _as[::-1]
+
+        sorted_index = self.take(_as)
+
+        if return_indexer:
+            return sorted_index, _as
+        else:
+            return sorted_index
+
+    def sort(self, *args, **kwargs):
+        raise TypeError("cannot sort an Index object in-place, use "
+                        "sort_values instead")
+
+    def shift(self, periods=1, freq=None):
+        """
+        Shift index by desired number of time frequency increments.
+
+        This method is for shifting the values of datetime-like indexes
+        by a specified time increment a given number of times.
+
+        Parameters
+        ----------
+        periods : int, default 1
+            Number of periods (or increments) to shift by,
+            can be positive or negative.
+        freq : pandas.DateOffset, pandas.Timedelta or string, optional
+            Frequency increment to shift by.
+            If None, the index is shifted by its own `freq` attribute.
+            Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
+
+        Returns
+        -------
+        pandas.Index
+            shifted index
+
+        See Also
+        --------
+        Series.shift : Shift values of Series.
+
+        Examples
+        --------
+        Put the first 5 month starts of 2011 into an index.
+
+        >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')
+        >>> month_starts
+        DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',
+                       '2011-05-01'],
+                      dtype='datetime64[ns]', freq='MS')
+
+        Shift the index by 10 days.
+
+        >>> month_starts.shift(10, freq='D')
+        DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',
+                       '2011-05-11'],
+                      dtype='datetime64[ns]', freq=None)
+
+        The default value of `freq` is the `freq` attribute of the index,
+        which is 'MS' (month start) in this example.
+
+        >>> month_starts.shift(10)
+        DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',
+                       '2012-03-01'],
+                      dtype='datetime64[ns]', freq='MS')
+
+        Notes
+        -----
+        This method is only implemented for datetime-like index classes,
+        i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.
+        """
+        raise NotImplementedError("Not supported for type %s" %
+                                  type(self).__name__)
+
+    def argsort(self, *args, **kwargs):
+        """
+        Return the integer indices that would sort the index.
+
+        Parameters
+        ----------
+        *args
+            Passed to `numpy.ndarray.argsort`.
+        **kwargs
+            Passed to `numpy.ndarray.argsort`.
+
+        Returns
+        -------
+        numpy.ndarray
+            Integer indices that would sort the index if used as
+            an indexer.
+
+        See Also
+        --------
+        numpy.argsort : Similar method for NumPy arrays.
+        Index.sort_values : Return sorted copy of Index.
+
+        Examples
+        --------
+        >>> idx = pd.Index(['b', 'a', 'd', 'c'])
+        >>> idx
+        Index(['b', 'a', 'd', 'c'], dtype='object')
+
+        >>> order = idx.argsort()
+        >>> order
+        array([1, 0, 3, 2])
+
+        >>> idx[order]
+        Index(['a', 'b', 'c', 'd'], dtype='object')
+        """
+        result = self.asi8
+        if result is None:
+            result = np.array(self)
+        return result.argsort(*args, **kwargs)
+
+    def get_value(self, series, key):
+        """
+        Fast lookup of value from 1-dimensional ndarray. Only use this if you
+        know what you're doing.
+        """
+
+        # if we have something that is Index-like, then
+        # use this, e.g. DatetimeIndex
+        s = getattr(series, '_values', None)
+        if isinstance(s, (ExtensionArray, Index)) and is_scalar(key):
+            # GH 20882, 21257
+            # Unify Index and ExtensionArray treatment
+            # First try to convert the key to a location
+            # If that fails, raise a KeyError if an integer
+            # index, otherwise, see if key is an integer, and
+            # try that
+            try:
+                iloc = self.get_loc(key)
+                return s[iloc]
+            except KeyError:
+                if (len(self) > 0 and
+                        (self.holds_integer() or self.is_boolean())):
+                    raise
+                elif is_integer(key):
+                    return s[key]
+
+        s = com.values_from_object(series)
+        k = com.values_from_object(key)
+
+        k = self._convert_scalar_indexer(k, kind='getitem')
+        try:
+            return self._engine.get_value(s, k,
+                                          tz=getattr(series.dtype, 'tz', None))
+        except KeyError as e1:
+            if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
+                raise
+
+            try:
+                return libindex.get_value_box(s, key)
+            except IndexError:
+                raise
+            except TypeError:
+                # generator/iterator-like
+                if is_iterator(key):
+                    raise InvalidIndexError(key)
+                else:
+                    raise e1
+            except Exception:  # pragma: no cover
+                raise e1
+        except TypeError:
+            # python 3
+            if is_scalar(key):  # pragma: no cover
+                raise IndexError(key)
+            raise InvalidIndexError(key)
+
+    def set_value(self, arr, key, value):
+        """
+        Fast lookup of value from 1-dimensional ndarray.
+
+        Notes
+        -----
+        Only use this if you know what you're doing.
+        """
+        self._engine.set_value(com.values_from_object(arr),
+                               com.values_from_object(key), value)
+
+    _index_shared_docs['get_indexer_non_unique'] = """
+        Compute indexer and mask for new index given the current index. The
+        indexer should be then used as an input to ndarray.take to align the
+        current data to the new index.
+
+        Parameters
+        ----------
+        target : %(target_klass)s
+
+        Returns
+        -------
+        indexer : ndarray of int
+            Integers from 0 to n - 1 indicating that the index at these
+            positions matches the corresponding target values. Missing values
+            in the target are marked by -1.
+        missing : ndarray of int
+            An indexer into the target of the values not found.
+            These correspond to the -1 in the indexer array
+        """
 
-        level = other.names.index(jl)
-        result = self._join_level(other, level, how=how,
-                                  return_indexers=return_indexers)
+    @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
+    def get_indexer_non_unique(self, target):
+        target = ensure_index(target)
+        if is_categorical(target):
+            target = target.astype(target.dtype.categories.dtype)
+        pself, ptarget = self._maybe_promote(target)
+        if pself is not self or ptarget is not target:
+            return pself.get_indexer_non_unique(ptarget)
 
-        if flip_order:
-            if isinstance(result, tuple):
-                return result[0], result[2], result[1]
-        return result
+        if self.is_all_dates:
+            self = Index(self.asi8)
+            tgt_values = target.asi8
+        else:
+            tgt_values = target._ndarray_values
 
-    def _join_non_unique(self, other, how='left', return_indexers=False):
-        from pandas.core.reshape.merge import _get_join_indexers
+        indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
+        return ensure_platform_int(indexer), missing
 
-        left_idx, right_idx = _get_join_indexers([self._ndarray_values],
-                                                 [other._ndarray_values],
-                                                 how=how,
-                                                 sort=True)
+    def get_indexer_for(self, target, **kwargs):
+        """
+        Guaranteed return of an indexer even when non-unique.
 
-        left_idx = ensure_platform_int(left_idx)
-        right_idx = ensure_platform_int(right_idx)
+        This dispatches to get_indexer or get_indexer_nonunique
+        as appropriate.
+        """
+        if self.is_unique:
+            return self.get_indexer(target, **kwargs)
+        indexer, _ = self.get_indexer_non_unique(target, **kwargs)
+        return indexer
 
-        join_index = np.asarray(self._ndarray_values.take(left_idx))
-        mask = left_idx == -1
-        np.putmask(join_index, mask, other._ndarray_values.take(right_idx))
+    def _maybe_promote(self, other):
+        # A hack, but it works
+        from pandas import DatetimeIndex
+        if self.inferred_type == 'date' and isinstance(other, DatetimeIndex):
+            return DatetimeIndex(self), other
+        elif self.inferred_type == 'boolean':
+            if not is_object_dtype(self.dtype):
+                return self.astype('object'), other.astype('object')
+        return self, other
 
-        join_index = self._wrap_joined_index(join_index, other)
+    def groupby(self, values):
+        """
+        Group the index labels by a given array of values.
 
-        if return_indexers:
-            return join_index, left_idx, right_idx
-        else:
-            return join_index
+        Parameters
+        ----------
+        values : array
+            Values used to determine the groups.
 
-    def _join_level(self, other, level, how='left', return_indexers=False,
-                    keep_order=True):
+        Returns
+        -------
+        groups : dict
+            {group name -> group labels}
         """
-        The join method *only* affects the level of the resulting
-        MultiIndex. Otherwise it just exactly aligns the Index data to the
-        labels of the level in the MultiIndex.
 
-        If ```keep_order == True```, the order of the data indexed by the
-        MultiIndex will not be changed; otherwise, it will tie out
-        with `other`.
-        """
+        # TODO: if we are a MultiIndex, we can do better
+        # that converting to tuples
         from .multi import MultiIndex
+        if isinstance(values, MultiIndex):
+            values = values.values
+        values = ensure_categorical(values)
+        result = values._reverse_indexer()
 
-        def _get_leaf_sorter(labels):
-            """
-            Returns sorter for the inner most level while preserving the
-            order of higher levels.
-            """
-            if labels[0].size == 0:
-                return np.empty(0, dtype='int64')
+        # map to the label
+        result = {k: self.take(v) for k, v in compat.iteritems(result)}
 
-            if len(labels) == 1:
-                lab = ensure_int64(labels[0])
-                sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max())
-                return sorter
+        return result
 
-            # find indexers of beginning of each set of
-            # same-key labels w.r.t all but last level
-            tic = labels[0][:-1] != labels[0][1:]
-            for lab in labels[1:-1]:
-                tic |= lab[:-1] != lab[1:]
+    def map(self, mapper, na_action=None):
+        """
+        Map values using input correspondence (a dict, Series, or function).
 
-            starts = np.hstack(([True], tic, [True])).nonzero()[0]
-            lab = ensure_int64(labels[-1])
-            return lib.get_level_sorter(lab, ensure_int64(starts))
+        Parameters
+        ----------
+        mapper : function, dict, or Series
+            Mapping correspondence.
+        na_action : {None, 'ignore'}
+            If 'ignore', propagate NA values, without passing them to the
+            mapping correspondence.
 
-        if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
-            raise TypeError('Join on level between two MultiIndex objects '
-                            'is ambiguous')
+        Returns
+        -------
+        applied : Union[Index, MultiIndex], inferred
+            The output of the mapping function applied to the index.
+            If the function returns a tuple with more than one element
+            a MultiIndex will be returned.
+        """
 
-        left, right = self, other
+        from .multi import MultiIndex
+        new_values = super(Index, self)._map_values(
+            mapper, na_action=na_action)
 
-        flip_order = not isinstance(self, MultiIndex)
-        if flip_order:
-            left, right = right, left
-            how = {'right': 'left', 'left': 'right'}.get(how, how)
+        attributes = self._get_attributes_dict()
 
-        level = left._get_level_number(level)
-        old_level = left.levels[level]
+        # we can return a MultiIndex
+        if new_values.size and isinstance(new_values[0], tuple):
+            if isinstance(self, MultiIndex):
+                names = self.names
+            elif attributes.get('name'):
+                names = [attributes.get('name')] * len(new_values[0])
+            else:
+                names = None
+            return MultiIndex.from_tuples(new_values,
+                                          names=names)
 
-        if not right.is_unique:
-            raise NotImplementedError('Index._join_level on non-unique index '
-                                      'is not implemented')
+        attributes['copy'] = False
+        if not new_values.size:
+            # empty
+            attributes['dtype'] = self.dtype
 
-        new_level, left_lev_indexer, right_lev_indexer = \
-            old_level.join(right, how=how, return_indexers=True)
+        return Index(new_values, **attributes)
 
-        if left_lev_indexer is None:
-            if keep_order or len(left) == 0:
-                left_indexer = None
-                join_index = left
-            else:  # sort the leaves
-                left_indexer = _get_leaf_sorter(left.labels[:level + 1])
-                join_index = left[left_indexer]
+    def isin(self, values, level=None):
+        """
+        Return a boolean array where the index values are in `values`.
 
-        else:
-            left_lev_indexer = ensure_int64(left_lev_indexer)
-            rev_indexer = lib.get_reverse_indexer(left_lev_indexer,
-                                                  len(old_level))
+        Compute boolean array of whether each index value is found in the
+        passed set of values. The length of the returned boolean array matches
+        the length of the index.
 
-            new_lev_labels = algos.take_nd(rev_indexer, left.labels[level],
-                                           allow_fill=False)
+        Parameters
+        ----------
+        values : set or list-like
+            Sought values.
 
-            new_labels = list(left.labels)
-            new_labels[level] = new_lev_labels
+            .. versionadded:: 0.18.1
 
-            new_levels = list(left.levels)
-            new_levels[level] = new_level
+               Support for values as a set.
 
-            if keep_order:  # just drop missing values. o.w. keep order
-                left_indexer = np.arange(len(left), dtype=np.intp)
-                mask = new_lev_labels != -1
-                if not mask.all():
-                    new_labels = [lab[mask] for lab in new_labels]
-                    left_indexer = left_indexer[mask]
+        level : str or int, optional
+            Name or position of the index level to use (if the index is a
+            `MultiIndex`).
 
-            else:  # tie out the order with other
-                if level == 0:  # outer most level, take the fast route
-                    ngroups = 1 + new_lev_labels.max()
-                    left_indexer, counts = libalgos.groupsort_indexer(
-                        new_lev_labels, ngroups)
+        Returns
+        -------
+        is_contained : ndarray
+            NumPy array of boolean values.
 
-                    # missing values are placed first; drop them!
-                    left_indexer = left_indexer[counts[0]:]
-                    new_labels = [lab[left_indexer] for lab in new_labels]
+        See Also
+        --------
+        Series.isin : Same for Series.
+        DataFrame.isin : Same method for DataFrames.
 
-                else:  # sort the leaves
-                    mask = new_lev_labels != -1
-                    mask_all = mask.all()
-                    if not mask_all:
-                        new_labels = [lab[mask] for lab in new_labels]
+        Notes
+        -----
+        In the case of `MultiIndex` you must either specify `values` as a
+        list-like object containing tuples that are the same length as the
+        number of levels, or specify `level`. Otherwise it will raise a
+        ``ValueError``.
 
-                    left_indexer = _get_leaf_sorter(new_labels[:level + 1])
-                    new_labels = [lab[left_indexer] for lab in new_labels]
+        If `level` is specified:
 
-                    # left_indexers are w.r.t masked frame.
-                    # reverse to original frame!
-                    if not mask_all:
-                        left_indexer = mask.nonzero()[0][left_indexer]
+        - if it is the name of one *and only one* index level, use that level;
+        - otherwise it should be a number indicating level position.
 
-            join_index = MultiIndex(levels=new_levels, labels=new_labels,
-                                    names=left.names, verify_integrity=False)
+        Examples
+        --------
+        >>> idx = pd.Index([1,2,3])
+        >>> idx
+        Int64Index([1, 2, 3], dtype='int64')
 
-        if right_lev_indexer is not None:
-            right_indexer = algos.take_nd(right_lev_indexer,
-                                          join_index.labels[level],
-                                          allow_fill=False)
-        else:
-            right_indexer = join_index.labels[level]
+        Check whether each index value in a list of values.
+        >>> idx.isin([1, 4])
+        array([ True, False, False])
 
-        if flip_order:
-            left_indexer, right_indexer = right_indexer, left_indexer
+        >>> midx = pd.MultiIndex.from_arrays([[1,2,3],
+        ...                                  ['red', 'blue', 'green']],
+        ...                                  names=('number', 'color'))
+        >>> midx
+        MultiIndex(levels=[[1, 2, 3], ['blue', 'green', 'red']],
+                   labels=[[0, 1, 2], [2, 0, 1]],
+                   names=['number', 'color'])
+
+        Check whether the strings in the 'color' level of the MultiIndex
+        are in a list of colors.
 
-        if return_indexers:
-            left_indexer = (None if left_indexer is None
-                            else ensure_platform_int(left_indexer))
-            right_indexer = (None if right_indexer is None
-                             else ensure_platform_int(right_indexer))
-            return join_index, left_indexer, right_indexer
-        else:
-            return join_index
+        >>> midx.isin(['red', 'orange', 'yellow'], level='color')
+        array([ True, False, False])
 
-    def _join_monotonic(self, other, how='left', return_indexers=False):
-        if self.equals(other):
-            ret_index = other if how == 'right' else self
-            if return_indexers:
-                return ret_index, None, None
-            else:
-                return ret_index
+        To check across the levels of a MultiIndex, pass a list of tuples:
 
-        sv = self._ndarray_values
-        ov = other._ndarray_values
+        >>> midx.isin([(1, 'red'), (3, 'red')])
+        array([ True, False, False])
 
-        if self.is_unique and other.is_unique:
-            # We can perform much better than the general case
-            if how == 'left':
-                join_index = self
-                lidx = None
-                ridx = self._left_indexer_unique(sv, ov)
-            elif how == 'right':
-                join_index = other
-                lidx = self._left_indexer_unique(ov, sv)
-                ridx = None
-            elif how == 'inner':
-                join_index, lidx, ridx = self._inner_indexer(sv, ov)
-                join_index = self._wrap_joined_index(join_index, other)
-            elif how == 'outer':
-                join_index, lidx, ridx = self._outer_indexer(sv, ov)
-                join_index = self._wrap_joined_index(join_index, other)
-        else:
-            if how == 'left':
-                join_index, lidx, ridx = self._left_indexer(sv, ov)
-            elif how == 'right':
-                join_index, ridx, lidx = self._left_indexer(ov, sv)
-            elif how == 'inner':
-                join_index, lidx, ridx = self._inner_indexer(sv, ov)
-            elif how == 'outer':
-                join_index, lidx, ridx = self._outer_indexer(sv, ov)
-            join_index = self._wrap_joined_index(join_index, other)
+        For a DatetimeIndex, string values in `values` are converted to
+        Timestamps.
 
-        if return_indexers:
-            lidx = None if lidx is None else ensure_platform_int(lidx)
-            ridx = None if ridx is None else ensure_platform_int(ridx)
-            return join_index, lidx, ridx
-        else:
-            return join_index
+        >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']
+        >>> dti = pd.to_datetime(dates)
+        >>> dti
+        DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],
+        dtype='datetime64[ns]', freq=None)
 
-    def _wrap_joined_index(self, joined, other):
-        name = get_op_result_name(self, other)
-        return Index(joined, name=name)
+        >>> dti.isin(['2000-03-11'])
+        array([ True, False, False])
+        """
+        if level is not None:
+            self._validate_index_level(level)
+        return algos.isin(self, values)
 
     def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
         # this is for partial string indexing,
@@ -4630,190 +4870,8 @@ def drop(self, labels, errors='raise'):
             indexer = indexer[~mask]
         return self.delete(indexer)
 
-    _index_shared_docs['index_unique'] = (
-        """
-        Return unique values in the index. Uniques are returned in order
-        of appearance, this does NOT sort.
-
-        Parameters
-        ----------
-        level : int or str, optional, default None
-            Only return values from specified level (for MultiIndex)
-
-            .. versionadded:: 0.23.0
-
-        Returns
-        -------
-        Index without duplicates
-
-        See Also
-        --------
-        unique
-        Series.unique
-        """)
-
-    @Appender(_index_shared_docs['index_unique'] % _index_doc_kwargs)
-    def unique(self, level=None):
-        if level is not None:
-            self._validate_index_level(level)
-        result = super(Index, self).unique()
-        return self._shallow_copy(result)
-
-    def drop_duplicates(self, keep='first'):
-        """
-        Return Index with duplicate values removed.
-
-        Parameters
-        ----------
-        keep : {'first', 'last', ``False``}, default 'first'
-            - 'first' : Drop duplicates except for the first occurrence.
-            - 'last' : Drop duplicates except for the last occurrence.
-            - ``False`` : Drop all duplicates.
-
-        Returns
-        -------
-        deduplicated : Index
-
-        See Also
-        --------
-        Series.drop_duplicates : Equivalent method on Series.
-        DataFrame.drop_duplicates : Equivalent method on DataFrame.
-        Index.duplicated : Related method on Index, indicating duplicate
-            Index values.
-
-        Examples
-        --------
-        Generate an pandas.Index with duplicate values.
-
-        >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
-
-        The `keep` parameter controls  which duplicate values are removed.
-        The value 'first' keeps the first occurrence for each
-        set of duplicated entries. The default value of keep is 'first'.
-
-        >>> idx.drop_duplicates(keep='first')
-        Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
-
-        The value 'last' keeps the last occurrence for each set of duplicated
-        entries.
-
-        >>> idx.drop_duplicates(keep='last')
-        Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')
-
-        The value ``False`` discards all sets of duplicated entries.
-
-        >>> idx.drop_duplicates(keep=False)
-        Index(['cow', 'beetle', 'hippo'], dtype='object')
-        """
-        return super(Index, self).drop_duplicates(keep=keep)
-
-    def duplicated(self, keep='first'):
-        """
-        Indicate duplicate index values.
-
-        Duplicated values are indicated as ``True`` values in the resulting
-        array. Either all duplicates, all except the first, or all except the
-        last occurrence of duplicates can be indicated.
-
-        Parameters
-        ----------
-        keep : {'first', 'last', False}, default 'first'
-            The value or values in a set of duplicates to mark as missing.
-
-            - 'first' : Mark duplicates as ``True`` except for the first
-              occurrence.
-            - 'last' : Mark duplicates as ``True`` except for the last
-              occurrence.
-            - ``False`` : Mark all duplicates as ``True``.
-
-        Examples
-        --------
-        By default, for each set of duplicated values, the first occurrence is
-        set to False and all others to True:
-
-        >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])
-        >>> idx.duplicated()
-        array([False, False,  True, False,  True])
-
-        which is equivalent to
-
-        >>> idx.duplicated(keep='first')
-        array([False, False,  True, False,  True])
-
-        By using 'last', the last occurrence of each set of duplicated values
-        is set on False and all others on True:
-
-        >>> idx.duplicated(keep='last')
-        array([ True, False,  True, False, False])
-
-        By setting keep on ``False``, all duplicates are True:
-
-        >>> idx.duplicated(keep=False)
-        array([ True, False,  True, False,  True])
-
-        Returns
-        -------
-        numpy.ndarray
-
-        See Also
-        --------
-        pandas.Series.duplicated : Equivalent method on pandas.Series.
-        pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame.
-        pandas.Index.drop_duplicates : Remove duplicate values from Index.
-        """
-        return super(Index, self).duplicated(keep=keep)
-
-    _index_shared_docs['fillna'] = """
-        Fill NA/NaN values with the specified value
-
-        Parameters
-        ----------
-        value : scalar
-            Scalar value to use to fill holes (e.g. 0).
-            This value cannot be a list-likes.
-        downcast : dict, default is None
-            a dict of item->dtype of what to downcast if possible,
-            or the string 'infer' which will try to downcast to an appropriate
-            equal type (e.g. float64 to int64 if possible)
-
-        Returns
-        -------
-        filled : %(klass)s
-        """
-
-    @Appender(_index_shared_docs['fillna'])
-    def fillna(self, value=None, downcast=None):
-        self._assert_can_do_op(value)
-        if self.hasnans:
-            result = self.putmask(self._isnan, value)
-            if downcast is None:
-                # no need to care metadata other than name
-                # because it can't have freq if
-                return Index(result, name=self.name)
-        return self._shallow_copy()
-
-    _index_shared_docs['dropna'] = """
-        Return Index without NA/NaN values
-
-        Parameters
-        ----------
-        how :  {'any', 'all'}, default 'any'
-            If the Index is a MultiIndex, drop the value when any or all levels
-            are NaN.
-
-        Returns
-        -------
-        valid : Index
-        """
-
-    @Appender(_index_shared_docs['dropna'])
-    def dropna(self, how='any'):
-        if how not in ('any', 'all'):
-            raise ValueError("invalid how option: {0}".format(how))
-
-        if self.hasnans:
-            return self._shallow_copy(self.values[~self._isnan])
-        return self._shallow_copy()
+    # --------------------------------------------------------------------
+    # Generated Arithmetic, Comparison, and Unary Methods
 
     def _evaluate_with_timedelta_like(self, other, op):
         # Timedelta knows how to operate with np.array, so dispatch to that
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index f05b0fdd4a3236..6b84e8deea4934 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -94,6 +94,9 @@ def _engine_type(self):
 
     _attributes = ['name']
 
+    # --------------------------------------------------------------------
+    # Constructors
+
     def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
                 copy=False, name=None, fastpath=None):
 
@@ -212,6 +215,8 @@ def _simple_new(cls, values, name=None, categories=None, ordered=None,
         result._reset_identity()
         return result
 
+    # --------------------------------------------------------------------
+
     @Appender(_index_shared_docs['_shallow_copy'])
     def _shallow_copy(self, values=None, categories=None, ordered=None,
                       dtype=None, **kwargs):
@@ -284,6 +289,9 @@ def equals(self, other):
 
         return False
 
+    # --------------------------------------------------------------------
+    # Rendering Methods
+
     @property
     def _formatter_func(self):
         return self.categories._formatter_func
@@ -307,6 +315,8 @@ def _format_attrs(self):
             attrs.append(('length', len(self)))
         return attrs
 
+    # --------------------------------------------------------------------
+
     @property
     def inferred_type(self):
         return 'categorical'
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 0e2f7ceb24e94c..5e25efe77d8b95 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -331,9 +331,6 @@ def _box_values_as_index(self):
         from pandas.core.index import Index
         return Index(self._box_values(self.asi8), name=self.name, dtype=object)
 
-    def _format_with_header(self, header, **kwargs):
-        return header + list(self._format_native_types(**kwargs))
-
     @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs)
     def __contains__(self, key):
         try:
@@ -544,6 +541,12 @@ def argmax(self, axis=None, *args, **kwargs):
             i8[mask] = 0
         return i8.argmax()
 
+    # --------------------------------------------------------------------
+    # Rendering Methods
+
+    def _format_with_header(self, header, **kwargs):
+        return header + list(self._format_native_types(**kwargs))
+
     @property
     def _formatter_func(self):
         raise AbstractMethodError(self)
@@ -561,6 +564,8 @@ def _format_attrs(self):
                 attrs.append(('freq', freq))
         return attrs
 
+    # --------------------------------------------------------------------
+
     def _convert_scalar_indexer(self, key, kind=None):
         """
         We don't allow integer or float indexing on datetime-like when using
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 8b563a9b9bed00..16c1e22d400176 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -372,22 +372,12 @@ def nbytes(self):
         # for TZ-aware
         return self._ndarray_values.nbytes
 
-    def _mpl_repr(self):
-        # how to represent ourselves to matplotlib
-        return libts.ints_to_pydatetime(self.asi8, self.tz)
-
     @cache_readonly
     def _is_dates_only(self):
         """Return a boolean if we are only dates (and don't have a timezone)"""
         from pandas.io.formats.format import _is_dates_only
         return _is_dates_only(self.values) and self.tz is None
 
-    @property
-    def _formatter_func(self):
-        from pandas.io.formats.format import _get_format_datetime64
-        formatter = _get_format_datetime64(is_dates_only=self._is_dates_only)
-        return lambda x: "'%s'" % formatter(x, tz=self.tz)
-
     def __reduce__(self):
 
         # we use a special reudce here because we need
@@ -439,6 +429,13 @@ def _maybe_update_attributes(self, attrs):
             attrs['freq'] = 'infer'
         return attrs
 
+    # --------------------------------------------------------------------
+    # Rendering Methods
+
+    def _mpl_repr(self):
+        # how to represent ourselves to matplotlib
+        return libts.ints_to_pydatetime(self.asi8, self.tz)
+
     def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
         from pandas.io.formats.format import _get_format_datetime64_from_values
         format = _get_format_datetime64_from_values(self, date_format)
@@ -448,124 +445,14 @@ def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
                                                 format=format,
                                                 na_rep=na_rep)
 
-    @Appender(_index_shared_docs['astype'])
-    def astype(self, dtype, copy=True):
-        dtype = pandas_dtype(dtype)
-        if (is_datetime64_ns_dtype(dtype) and
-                not is_dtype_equal(dtype, self.dtype)):
-            # GH 18951: datetime64_ns dtype but not equal means different tz
-            new_tz = getattr(dtype, 'tz', None)
-            if getattr(self.dtype, 'tz', None) is None:
-                return self.tz_localize(new_tz)
-            return self.tz_convert(new_tz)
-        elif is_period_dtype(dtype):
-            return self.to_period(freq=dtype.freq)
-        return super(DatetimeIndex, self).astype(dtype, copy=copy)
-
-    def _get_time_micros(self):
-        values = self.asi8
-        if self.tz is not None and not timezones.is_utc(self.tz):
-            values = self._local_timestamps()
-        return fields.get_time_micros(values)
-
-    def to_series(self, keep_tz=None, index=None, name=None):
-        """
-        Create a Series with both index and values equal to the index keys
-        useful with map for returning an indexer based on an index
-
-        Parameters
-        ----------
-        keep_tz : optional, defaults False
-            Return the data keeping the timezone.
-
-            If keep_tz is True:
-
-              If the timezone is not set, the resulting
-              Series will have a datetime64[ns] dtype.
-
-              Otherwise the Series will have an datetime64[ns, tz] dtype; the
-              tz will be preserved.
-
-            If keep_tz is False:
-
-              Series will have a datetime64[ns] dtype. TZ aware
-              objects will have the tz removed.
-
-            .. versionchanged:: 0.24
-                The default value will change to True in a future release.
-                You can set ``keep_tz=True`` to already obtain the future
-                behaviour and silence the warning.
-
-        index : Index, optional
-            index of resulting Series. If None, defaults to original index
-        name : string, optional
-            name of resulting Series. If None, defaults to name of original
-            index
-
-        Returns
-        -------
-        Series
-        """
-        from pandas import Series
-
-        if index is None:
-            index = self._shallow_copy()
-        if name is None:
-            name = self.name
-
-        if keep_tz is None and self.tz is not None:
-            warnings.warn("The default of the 'keep_tz' keyword will change "
-                          "to True in a future release. You can set "
-                          "'keep_tz=True' to obtain the future behaviour and "
-                          "silence this warning.", FutureWarning, stacklevel=2)
-            keep_tz = False
-        elif keep_tz is False:
-            warnings.warn("Specifying 'keep_tz=False' is deprecated and this "
-                          "option will be removed in a future release. If "
-                          "you want to remove the timezone information, you "
-                          "can do 'idx.tz_convert(None)' before calling "
-                          "'to_series'.", FutureWarning, stacklevel=2)
-
-        if keep_tz and self.tz is not None:
-            # preserve the tz & copy
-            values = self.copy(deep=True)
-        else:
-            values = self.values.copy()
-
-        return Series(values, index=index, name=name)
-
-    def snap(self, freq='S'):
-        """
-        Snap time stamps to nearest occurring frequency
-        """
-        # Superdumb, punting on any optimizing
-        freq = to_offset(freq)
-
-        snapped = np.empty(len(self), dtype=_NS_DTYPE)
-
-        for i, v in enumerate(self):
-            s = v
-            if not freq.onOffset(s):
-                t0 = freq.rollback(s)
-                t1 = freq.rollforward(s)
-                if abs(s - t0) < abs(t1 - s):
-                    s = t0
-                else:
-                    s = t1
-            snapped[i] = s
-
-        # we know it conforms; skip check
-        return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
-        # TODO: what about self.name?  if so, use shallow_copy?
-
-    def unique(self, level=None):
-        if level is not None:
-            self._validate_index_level(level)
+    @property
+    def _formatter_func(self):
+        from pandas.io.formats.format import _get_format_datetime64
+        formatter = _get_format_datetime64(is_dates_only=self._is_dates_only)
+        return lambda x: "'%s'" % formatter(x, tz=self.tz)
 
-        # TODO(DatetimeArray): change dispatch once inheritance is removed
-        # call DatetimeArray method
-        result = DatetimeArray.unique(self)
-        return self._shallow_copy(result._data)
+    # --------------------------------------------------------------------
+    # Set Operation Methods
 
     def union(self, other):
         """
@@ -634,51 +521,6 @@ def union_many(self, others):
 
         return this
 
-    def join(self, other, how='left', level=None, return_indexers=False,
-             sort=False):
-        """
-        See Index.join
-        """
-        if (not isinstance(other, DatetimeIndex) and len(other) > 0 and
-            other.inferred_type not in ('floating', 'integer', 'mixed-integer',
-                                        'mixed-integer-float', 'mixed')):
-            try:
-                other = DatetimeIndex(other)
-            except (TypeError, ValueError):
-                pass
-
-        this, other = self._maybe_utc_convert(other)
-        return Index.join(this, other, how=how, level=level,
-                          return_indexers=return_indexers, sort=sort)
-
-    def _maybe_utc_convert(self, other):
-        this = self
-        if isinstance(other, DatetimeIndex):
-            if self.tz is not None:
-                if other.tz is None:
-                    raise TypeError('Cannot join tz-naive with tz-aware '
-                                    'DatetimeIndex')
-            elif other.tz is not None:
-                raise TypeError('Cannot join tz-naive with tz-aware '
-                                'DatetimeIndex')
-
-            if not timezones.tz_compare(self.tz, other.tz):
-                this = self.tz_convert('UTC')
-                other = other.tz_convert('UTC')
-        return this, other
-
-    def _wrap_joined_index(self, joined, other):
-        name = get_op_result_name(self, other)
-        if (isinstance(other, DatetimeIndex) and
-                self.freq == other.freq and
-                self._can_fast_union(other)):
-            joined = self._shallow_copy(joined)
-            joined.name = name
-            return joined
-        else:
-            tz = getattr(other, 'tz', None)
-            return self._simple_new(joined, name, tz=tz)
-
     def _can_fast_union(self, other):
         if not isinstance(other, DatetimeIndex):
             return False
@@ -805,6 +647,172 @@ def intersection(self, other):
             left_chunk = left.values[lslice]
             return self._shallow_copy(left_chunk)
 
+    # --------------------------------------------------------------------
+
+    @Appender(_index_shared_docs['astype'])
+    def astype(self, dtype, copy=True):
+        dtype = pandas_dtype(dtype)
+        if (is_datetime64_ns_dtype(dtype) and
+                not is_dtype_equal(dtype, self.dtype)):
+            # GH 18951: datetime64_ns dtype but not equal means different tz
+            new_tz = getattr(dtype, 'tz', None)
+            if getattr(self.dtype, 'tz', None) is None:
+                return self.tz_localize(new_tz)
+            return self.tz_convert(new_tz)
+        elif is_period_dtype(dtype):
+            return self.to_period(freq=dtype.freq)
+        return super(DatetimeIndex, self).astype(dtype, copy=copy)
+
+    def _get_time_micros(self):
+        values = self.asi8
+        if self.tz is not None and not timezones.is_utc(self.tz):
+            values = self._local_timestamps()
+        return fields.get_time_micros(values)
+
+    def to_series(self, keep_tz=None, index=None, name=None):
+        """
+        Create a Series with both index and values equal to the index keys
+        useful with map for returning an indexer based on an index
+
+        Parameters
+        ----------
+        keep_tz : optional, defaults False
+            Return the data keeping the timezone.
+
+            If keep_tz is True:
+
+              If the timezone is not set, the resulting
+              Series will have a datetime64[ns] dtype.
+
+              Otherwise the Series will have an datetime64[ns, tz] dtype; the
+              tz will be preserved.
+
+            If keep_tz is False:
+
+              Series will have a datetime64[ns] dtype. TZ aware
+              objects will have the tz removed.
+
+            .. versionchanged:: 0.24
+                The default value will change to True in a future release.
+                You can set ``keep_tz=True`` to already obtain the future
+                behaviour and silence the warning.
+
+        index : Index, optional
+            index of resulting Series. If None, defaults to original index
+        name : string, optional
+            name of resulting Series. If None, defaults to name of original
+            index
+
+        Returns
+        -------
+        Series
+        """
+        from pandas import Series
+
+        if index is None:
+            index = self._shallow_copy()
+        if name is None:
+            name = self.name
+
+        if keep_tz is None and self.tz is not None:
+            warnings.warn("The default of the 'keep_tz' keyword will change "
+                          "to True in a future release. You can set "
+                          "'keep_tz=True' to obtain the future behaviour and "
+                          "silence this warning.", FutureWarning, stacklevel=2)
+            keep_tz = False
+        elif keep_tz is False:
+            warnings.warn("Specifying 'keep_tz=False' is deprecated and this "
+                          "option will be removed in a future release. If "
+                          "you want to remove the timezone information, you "
+                          "can do 'idx.tz_convert(None)' before calling "
+                          "'to_series'.", FutureWarning, stacklevel=2)
+
+        if keep_tz and self.tz is not None:
+            # preserve the tz & copy
+            values = self.copy(deep=True)
+        else:
+            values = self.values.copy()
+
+        return Series(values, index=index, name=name)
+
+    def snap(self, freq='S'):
+        """
+        Snap time stamps to nearest occurring frequency
+        """
+        # Superdumb, punting on any optimizing
+        freq = to_offset(freq)
+
+        snapped = np.empty(len(self), dtype=_NS_DTYPE)
+
+        for i, v in enumerate(self):
+            s = v
+            if not freq.onOffset(s):
+                t0 = freq.rollback(s)
+                t1 = freq.rollforward(s)
+                if abs(s - t0) < abs(t1 - s):
+                    s = t0
+                else:
+                    s = t1
+            snapped[i] = s
+
+        # we know it conforms; skip check
+        return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
+        # TODO: what about self.name?  if so, use shallow_copy?
+
+    def unique(self, level=None):
+        if level is not None:
+            self._validate_index_level(level)
+
+        # TODO(DatetimeArray): change dispatch once inheritance is removed
+        # call DatetimeArray method
+        result = DatetimeArray.unique(self)
+        return self._shallow_copy(result._data)
+
+    def join(self, other, how='left', level=None, return_indexers=False,
+             sort=False):
+        """
+        See Index.join
+        """
+        if (not isinstance(other, DatetimeIndex) and len(other) > 0 and
+            other.inferred_type not in ('floating', 'integer', 'mixed-integer',
+                                        'mixed-integer-float', 'mixed')):
+            try:
+                other = DatetimeIndex(other)
+            except (TypeError, ValueError):
+                pass
+
+        this, other = self._maybe_utc_convert(other)
+        return Index.join(this, other, how=how, level=level,
+                          return_indexers=return_indexers, sort=sort)
+
+    def _maybe_utc_convert(self, other):
+        this = self
+        if isinstance(other, DatetimeIndex):
+            if self.tz is not None:
+                if other.tz is None:
+                    raise TypeError('Cannot join tz-naive with tz-aware '
+                                    'DatetimeIndex')
+            elif other.tz is not None:
+                raise TypeError('Cannot join tz-naive with tz-aware '
+                                'DatetimeIndex')
+
+            if not timezones.tz_compare(self.tz, other.tz):
+                this = self.tz_convert('UTC')
+                other = other.tz_convert('UTC')
+        return this, other
+
+    def _wrap_joined_index(self, joined, other):
+        name = get_op_result_name(self, other)
+        if (isinstance(other, DatetimeIndex) and
+                self.freq == other.freq and
+                self._can_fast_union(other)):
+            joined = self._shallow_copy(joined)
+            joined.name = name
+            return joined
+        else:
+            tz = getattr(other, 'tz', None)
+            return self._simple_new(joined, name, tz=tz)
+
     def _parsed_string_to_bounds(self, reso, parsed):
         """
         Calculate datetime bounds for parsed time string and its resolution.
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 1ebcf213ab0ebf..5ee6a816d91f50 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -137,6 +137,9 @@ class IntervalIndex(IntervalMixin, Index):
     # Immutable, so we are able to cache computations like isna in '_mask'
     _mask = None
 
+    # --------------------------------------------------------------------
+    # Constructors
+
     def __new__(cls, data, closed=None, dtype=None, copy=False,
                 name=None, verify_integrity=True):
 
@@ -168,6 +171,50 @@ def _simple_new(cls, array, name, closed=None):
         result._reset_identity()
         return result
 
+    @classmethod
+    @Appender(_interval_shared_docs['from_breaks'] % _index_doc_kwargs)
+    def from_breaks(cls, breaks, closed='right', name=None, copy=False,
+                    dtype=None):
+        with rewrite_exception("IntervalArray", cls.__name__):
+            array = IntervalArray.from_breaks(breaks, closed=closed, copy=copy,
+                                              dtype=dtype)
+        return cls._simple_new(array, name=name)
+
+    @classmethod
+    @Appender(_interval_shared_docs['from_arrays'] % _index_doc_kwargs)
+    def from_arrays(cls, left, right, closed='right', name=None, copy=False,
+                    dtype=None):
+        with rewrite_exception("IntervalArray", cls.__name__):
+            array = IntervalArray.from_arrays(left, right, closed, copy=copy,
+                                              dtype=dtype)
+        return cls._simple_new(array, name=name)
+
+    @classmethod
+    @Appender(_interval_shared_docs['from_intervals'] % _index_doc_kwargs)
+    def from_intervals(cls, data, closed=None, name=None, copy=False,
+                       dtype=None):
+        msg = ('IntervalIndex.from_intervals is deprecated and will be '
+               'removed in a future version; Use IntervalIndex(...) instead')
+        warnings.warn(msg, FutureWarning, stacklevel=2)
+        with rewrite_exception("IntervalArray", cls.__name__):
+            array = IntervalArray(data, closed=closed, copy=copy, dtype=dtype)
+
+        if name is None and isinstance(data, cls):
+            name = data.name
+
+        return cls._simple_new(array, name=name)
+
+    @classmethod
+    @Appender(_interval_shared_docs['from_tuples'] % _index_doc_kwargs)
+    def from_tuples(cls, data, closed='right', name=None, copy=False,
+                    dtype=None):
+        with rewrite_exception("IntervalArray", cls.__name__):
+            arr = IntervalArray.from_tuples(data, closed=closed, copy=copy,
+                                            dtype=dtype)
+        return cls._simple_new(arr, name=name)
+
+    # --------------------------------------------------------------------
+
     @Appender(_index_shared_docs['_shallow_copy'])
     def _shallow_copy(self, left=None, right=None, **kwargs):
         result = self._data._shallow_copy(left=left, right=right)
@@ -231,48 +278,6 @@ def contains(self, key):
         except KeyError:
             return False
 
-    @classmethod
-    @Appender(_interval_shared_docs['from_breaks'] % _index_doc_kwargs)
-    def from_breaks(cls, breaks, closed='right', name=None, copy=False,
-                    dtype=None):
-        with rewrite_exception("IntervalArray", cls.__name__):
-            array = IntervalArray.from_breaks(breaks, closed=closed, copy=copy,
-                                              dtype=dtype)
-        return cls._simple_new(array, name=name)
-
-    @classmethod
-    @Appender(_interval_shared_docs['from_arrays'] % _index_doc_kwargs)
-    def from_arrays(cls, left, right, closed='right', name=None, copy=False,
-                    dtype=None):
-        with rewrite_exception("IntervalArray", cls.__name__):
-            array = IntervalArray.from_arrays(left, right, closed, copy=copy,
-                                              dtype=dtype)
-        return cls._simple_new(array, name=name)
-
-    @classmethod
-    @Appender(_interval_shared_docs['from_intervals'] % _index_doc_kwargs)
-    def from_intervals(cls, data, closed=None, name=None, copy=False,
-                       dtype=None):
-        msg = ('IntervalIndex.from_intervals is deprecated and will be '
-               'removed in a future version; Use IntervalIndex(...) instead')
-        warnings.warn(msg, FutureWarning, stacklevel=2)
-        with rewrite_exception("IntervalArray", cls.__name__):
-            array = IntervalArray(data, closed=closed, copy=copy, dtype=dtype)
-
-        if name is None and isinstance(data, cls):
-            name = data.name
-
-        return cls._simple_new(array, name=name)
-
-    @classmethod
-    @Appender(_interval_shared_docs['from_tuples'] % _index_doc_kwargs)
-    def from_tuples(cls, data, closed='right', name=None, copy=False,
-                    dtype=None):
-        with rewrite_exception("IntervalArray", cls.__name__):
-            arr = IntervalArray.from_tuples(data, closed=closed, copy=copy,
-                                            dtype=dtype)
-        return cls._simple_new(arr, name=name)
-
     @Appender(_interval_shared_docs['to_tuples'] % dict(
         return_type="Index",
         examples="""
@@ -941,6 +946,8 @@ def __getitem__(self, value):
             # scalar
             return result
 
+    # --------------------------------------------------------------------
+    # Rendering Methods
     # __repr__ associated methods are based on MultiIndex
 
     def _format_with_header(self, header, **kwargs):
@@ -997,6 +1004,8 @@ def _format_space(self):
         space = ' ' * (len(self.__class__.__name__) + 1)
         return "\n{space}".format(space=space)
 
+    # --------------------------------------------------------------------
+
     def argsort(self, *args, **kwargs):
         return np.lexsort((self.right, self.left))
 
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index ea6dfa6a3a6aff..f03376c32f7f46 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -200,6 +200,9 @@ class MultiIndex(Index):
     _comparables = ['names']
     rename = Index.set_names
 
+    # --------------------------------------------------------------------
+    # Constructors
+
     def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
                 dtype=None, copy=False, name=None,
                 verify_integrity=True, _set_identity=True):
@@ -275,6 +278,154 @@ def _verify_integrity(self, labels=None, levels=None):
                                      values=[value for value in level],
                                      level=i))
 
+    @classmethod
+    def from_arrays(cls, arrays, sortorder=None, names=None):
+        """
+        Convert arrays to MultiIndex
+
+        Parameters
+        ----------
+        arrays : list / sequence of array-likes
+            Each array-like gives one level's value for each data point.
+            len(arrays) is the number of levels.
+        sortorder : int or None
+            Level of sortedness (must be lexicographically sorted by that
+            level)
+
+        Returns
+        -------
+        index : MultiIndex
+
+        Examples
+        --------
+        >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
+        >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
+
+        See Also
+        --------
+        MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
+        MultiIndex.from_product : Make a MultiIndex from cartesian product
+                                  of iterables.
+        """
+        if not is_list_like(arrays):
+            raise TypeError("Input must be a list / sequence of array-likes.")
+        elif is_iterator(arrays):
+            arrays = list(arrays)
+
+        # Check if lengths of all arrays are equal or not,
+        # raise ValueError, if not
+        for i in range(1, len(arrays)):
+            if len(arrays[i]) != len(arrays[i - 1]):
+                raise ValueError('all arrays must be same length')
+
+        from pandas.core.arrays.categorical import _factorize_from_iterables
+
+        labels, levels = _factorize_from_iterables(arrays)
+        if names is None:
+            names = [getattr(arr, "name", None) for arr in arrays]
+
+        return MultiIndex(levels=levels, labels=labels, sortorder=sortorder,
+                          names=names, verify_integrity=False)
+
+    @classmethod
+    def from_tuples(cls, tuples, sortorder=None, names=None):
+        """
+        Convert list of tuples to MultiIndex
+
+        Parameters
+        ----------
+        tuples : list / sequence of tuple-likes
+            Each tuple is the index of one row/column.
+        sortorder : int or None
+            Level of sortedness (must be lexicographically sorted by that
+            level)
+
+        Returns
+        -------
+        index : MultiIndex
+
+        Examples
+        --------
+        >>> tuples = [(1, u'red'), (1, u'blue'),
+                      (2, u'red'), (2, u'blue')]
+        >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
+
+        See Also
+        --------
+        MultiIndex.from_arrays : Convert list of arrays to MultiIndex
+        MultiIndex.from_product : Make a MultiIndex from cartesian product
+                                  of iterables
+        """
+        if not is_list_like(tuples):
+            raise TypeError('Input must be a list / sequence of tuple-likes.')
+        elif is_iterator(tuples):
+            tuples = list(tuples)
+
+        if len(tuples) == 0:
+            if names is None:
+                msg = 'Cannot infer number of levels from empty list'
+                raise TypeError(msg)
+            arrays = [[]] * len(names)
+        elif isinstance(tuples, (np.ndarray, Index)):
+            if isinstance(tuples, Index):
+                tuples = tuples._values
+
+            arrays = list(lib.tuples_to_object_array(tuples).T)
+        elif isinstance(tuples, list):
+            arrays = list(lib.to_object_array_tuples(tuples).T)
+        else:
+            arrays = lzip(*tuples)
+
+        return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)
+
+    @classmethod
+    def from_product(cls, iterables, sortorder=None, names=None):
+        """
+        Make a MultiIndex from the cartesian product of multiple iterables
+
+        Parameters
+        ----------
+        iterables : list / sequence of iterables
+            Each iterable has unique labels for each level of the index.
+        sortorder : int or None
+            Level of sortedness (must be lexicographically sorted by that
+            level).
+        names : list / sequence of strings or None
+            Names for the levels in the index.
+
+        Returns
+        -------
+        index : MultiIndex
+
+        Examples
+        --------
+        >>> numbers = [0, 1, 2]
+        >>> colors = [u'green', u'purple']
+        >>> pd.MultiIndex.from_product([numbers, colors],
+                                       names=['number', 'color'])
+        MultiIndex(levels=[[0, 1, 2], [u'green', u'purple']],
+                   labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
+                   names=[u'number', u'color'])
+
+        See Also
+        --------
+        MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
+        MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
+        """
+        from pandas.core.arrays.categorical import _factorize_from_iterables
+        from pandas.core.reshape.util import cartesian_product
+
+        if not is_list_like(iterables):
+            raise TypeError("Input must be a list / sequence of iterables.")
+        elif is_iterator(iterables):
+            iterables = list(iterables)
+
+        labels, levels = _factorize_from_iterables(iterables)
+        labels = cartesian_product(labels)
+        return MultiIndex(levels, labels, sortorder=sortorder, names=names)
+
+    # --------------------------------------------------------------------
+
     @property
     def levels(self):
         return self._levels
@@ -622,6 +773,9 @@ def _nbytes(self, deep=False):
         result += self._engine.sizeof(deep=deep)
         return result
 
+    # --------------------------------------------------------------------
+    # Rendering Methods
+
     def _format_attrs(self):
         """
         Return a list of tuples of the (attr,formatted_value)
@@ -644,6 +798,94 @@ def _format_data(self, name=None):
         # we are formatting thru the attributes
         return None
 
+    def _format_native_types(self, na_rep='nan', **kwargs):
+        new_levels = []
+        new_labels = []
+
+        # go through the levels and format them
+        for level, label in zip(self.levels, self.labels):
+            level = level._format_native_types(na_rep=na_rep, **kwargs)
+            # add nan values, if there are any
+            mask = (label == -1)
+            if mask.any():
+                nan_index = len(level)
+                level = np.append(level, na_rep)
+                label = label.values()
+                label[mask] = nan_index
+            new_levels.append(level)
+            new_labels.append(label)
+
+        if len(new_levels) == 1:
+            return Index(new_levels[0])._format_native_types()
+        else:
+            # reconstruct the multi-index
+            mi = MultiIndex(levels=new_levels, labels=new_labels,
+                            names=self.names, sortorder=self.sortorder,
+                            verify_integrity=False)
+            return mi.values
+
+    def format(self, space=2, sparsify=None, adjoin=True, names=False,
+               na_rep=None, formatter=None):
+        if len(self) == 0:
+            return []
+
+        stringified_levels = []
+        for lev, lab in zip(self.levels, self.labels):
+            na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type)
+
+            if len(lev) > 0:
+
+                formatted = lev.take(lab).format(formatter=formatter)
+
+                # we have some NA
+                mask = lab == -1
+                if mask.any():
+                    formatted = np.array(formatted, dtype=object)
+                    formatted[mask] = na
+                    formatted = formatted.tolist()
+
+            else:
+                # weird all NA case
+                formatted = [pprint_thing(na if isna(x) else x,
+                                          escape_chars=('\t', '\r', '\n'))
+                             for x in algos.take_1d(lev._values, lab)]
+            stringified_levels.append(formatted)
+
+        result_levels = []
+        for lev, name in zip(stringified_levels, self.names):
+            level = []
+
+            if names:
+                level.append(pprint_thing(name,
+                                          escape_chars=('\t', '\r', '\n'))
+                             if name is not None else '')
+
+            level.extend(np.array(lev, dtype=object))
+            result_levels.append(level)
+
+        if sparsify is None:
+            sparsify = get_option("display.multi_sparse")
+
+        if sparsify:
+            sentinel = ''
+            # GH3547
+            # use value of sparsify as sentinel,  unless it's an obvious
+            # "Truthey" value
+            if sparsify not in [True, 1]:
+                sentinel = sparsify
+            # little bit of a kludge job for #1217
+            result_levels = _sparsify(result_levels, start=int(names),
+                                      sentinel=sentinel)
+
+        if adjoin:
+            from pandas.io.formats.format import _get_adjustment
+            adj = _get_adjustment()
+            return adj.adjoin(space, *result_levels).split('\n')
+        else:
+            return result_levels
+
+    # --------------------------------------------------------------------
+
     def __len__(self):
         return len(self.labels[0])
 
@@ -705,32 +947,6 @@ def _set_names(self, names, level=None, validate=True):
     names = property(fset=_set_names, fget=_get_names,
                      doc="Names of levels in MultiIndex")
 
-    def _format_native_types(self, na_rep='nan', **kwargs):
-        new_levels = []
-        new_labels = []
-
-        # go through the levels and format them
-        for level, label in zip(self.levels, self.labels):
-            level = level._format_native_types(na_rep=na_rep, **kwargs)
-            # add nan values, if there are any
-            mask = (label == -1)
-            if mask.any():
-                nan_index = len(level)
-                level = np.append(level, na_rep)
-                label = label.values()
-                label[mask] = nan_index
-            new_levels.append(level)
-            new_labels.append(label)
-
-        if len(new_levels) == 1:
-            return Index(new_levels[0])._format_native_types()
-        else:
-            # reconstruct the multi-index
-            mi = MultiIndex(levels=new_levels, labels=new_labels,
-                            names=self.names, sortorder=self.sortorder,
-                            verify_integrity=False)
-            return mi.values
-
     @Appender(_index_shared_docs['_get_grouper_for_level'])
     def _get_grouper_for_level(self, mapper, level):
         indexer = self.labels[level]
@@ -1081,66 +1297,6 @@ def unique(self, level=None):
             level = self._get_level_number(level)
             return self._get_level_values(level=level, unique=True)
 
-    def format(self, space=2, sparsify=None, adjoin=True, names=False,
-               na_rep=None, formatter=None):
-        if len(self) == 0:
-            return []
-
-        stringified_levels = []
-        for lev, lab in zip(self.levels, self.labels):
-            na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type)
-
-            if len(lev) > 0:
-
-                formatted = lev.take(lab).format(formatter=formatter)
-
-                # we have some NA
-                mask = lab == -1
-                if mask.any():
-                    formatted = np.array(formatted, dtype=object)
-                    formatted[mask] = na
-                    formatted = formatted.tolist()
-
-            else:
-                # weird all NA case
-                formatted = [pprint_thing(na if isna(x) else x,
-                                          escape_chars=('\t', '\r', '\n'))
-                             for x in algos.take_1d(lev._values, lab)]
-            stringified_levels.append(formatted)
-
-        result_levels = []
-        for lev, name in zip(stringified_levels, self.names):
-            level = []
-
-            if names:
-                level.append(pprint_thing(name,
-                                          escape_chars=('\t', '\r', '\n'))
-                             if name is not None else '')
-
-            level.extend(np.array(lev, dtype=object))
-            result_levels.append(level)
-
-        if sparsify is None:
-            sparsify = get_option("display.multi_sparse")
-
-        if sparsify:
-            sentinel = ''
-            # GH3547
-            # use value of sparsify as sentinel,  unless it's an obvious
-            # "Truthey" value
-            if sparsify not in [True, 1]:
-                sentinel = sparsify
-            # little bit of a kludge job for #1217
-            result_levels = _sparsify(result_levels, start=int(names),
-                                      sentinel=sentinel)
-
-        if adjoin:
-            from pandas.io.formats.format import _get_adjustment
-            adj = _get_adjustment()
-            return adj.adjoin(space, *result_levels).split('\n')
-        else:
-            return result_levels
-
     def _to_safe_for_reshape(self):
         """ convert to object if we are a categorical """
         return self.set_levels([i._to_safe_for_reshape() for i in self.levels])
@@ -1289,152 +1445,6 @@ def lexsort_depth(self):
 
         return 0
 
-    @classmethod
-    def from_arrays(cls, arrays, sortorder=None, names=None):
-        """
-        Convert arrays to MultiIndex
-
-        Parameters
-        ----------
-        arrays : list / sequence of array-likes
-            Each array-like gives one level's value for each data point.
-            len(arrays) is the number of levels.
-        sortorder : int or None
-            Level of sortedness (must be lexicographically sorted by that
-            level)
-
-        Returns
-        -------
-        index : MultiIndex
-
-        Examples
-        --------
-        >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
-        >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
-
-        See Also
-        --------
-        MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
-        MultiIndex.from_product : Make a MultiIndex from cartesian product
-                                  of iterables.
-        """
-        if not is_list_like(arrays):
-            raise TypeError("Input must be a list / sequence of array-likes.")
-        elif is_iterator(arrays):
-            arrays = list(arrays)
-
-        # Check if lengths of all arrays are equal or not,
-        # raise ValueError, if not
-        for i in range(1, len(arrays)):
-            if len(arrays[i]) != len(arrays[i - 1]):
-                raise ValueError('all arrays must be same length')
-
-        from pandas.core.arrays.categorical import _factorize_from_iterables
-
-        labels, levels = _factorize_from_iterables(arrays)
-        if names is None:
-            names = [getattr(arr, "name", None) for arr in arrays]
-
-        return MultiIndex(levels=levels, labels=labels, sortorder=sortorder,
-                          names=names, verify_integrity=False)
-
-    @classmethod
-    def from_tuples(cls, tuples, sortorder=None, names=None):
-        """
-        Convert list of tuples to MultiIndex
-
-        Parameters
-        ----------
-        tuples : list / sequence of tuple-likes
-            Each tuple is the index of one row/column.
-        sortorder : int or None
-            Level of sortedness (must be lexicographically sorted by that
-            level)
-
-        Returns
-        -------
-        index : MultiIndex
-
-        Examples
-        --------
-        >>> tuples = [(1, u'red'), (1, u'blue'),
-                      (2, u'red'), (2, u'blue')]
-        >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
-
-        See Also
-        --------
-        MultiIndex.from_arrays : Convert list of arrays to MultiIndex
-        MultiIndex.from_product : Make a MultiIndex from cartesian product
-                                  of iterables
-        """
-        if not is_list_like(tuples):
-            raise TypeError('Input must be a list / sequence of tuple-likes.')
-        elif is_iterator(tuples):
-            tuples = list(tuples)
-
-        if len(tuples) == 0:
-            if names is None:
-                msg = 'Cannot infer number of levels from empty list'
-                raise TypeError(msg)
-            arrays = [[]] * len(names)
-        elif isinstance(tuples, (np.ndarray, Index)):
-            if isinstance(tuples, Index):
-                tuples = tuples._values
-
-            arrays = list(lib.tuples_to_object_array(tuples).T)
-        elif isinstance(tuples, list):
-            arrays = list(lib.to_object_array_tuples(tuples).T)
-        else:
-            arrays = lzip(*tuples)
-
-        return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)
-
-    @classmethod
-    def from_product(cls, iterables, sortorder=None, names=None):
-        """
-        Make a MultiIndex from the cartesian product of multiple iterables
-
-        Parameters
-        ----------
-        iterables : list / sequence of iterables
-            Each iterable has unique labels for each level of the index.
-        sortorder : int or None
-            Level of sortedness (must be lexicographically sorted by that
-            level).
-        names : list / sequence of strings or None
-            Names for the levels in the index.
-
-        Returns
-        -------
-        index : MultiIndex
-
-        Examples
-        --------
-        >>> numbers = [0, 1, 2]
-        >>> colors = [u'green', u'purple']
-        >>> pd.MultiIndex.from_product([numbers, colors],
-                                       names=['number', 'color'])
-        MultiIndex(levels=[[0, 1, 2], [u'green', u'purple']],
-                   labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
-                   names=[u'number', u'color'])
-
-        See Also
-        --------
-        MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
-        MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
-        """
-        from pandas.core.arrays.categorical import _factorize_from_iterables
-        from pandas.core.reshape.util import cartesian_product
-
-        if not is_list_like(iterables):
-            raise TypeError("Input must be a list / sequence of iterables.")
-        elif is_iterator(iterables):
-            iterables = list(iterables)
-
-        labels, levels = _factorize_from_iterables(iterables)
-        labels = cartesian_product(labels)
-        return MultiIndex(levels, labels, sortorder=sortorder, names=names)
-
     def _sort_levels_monotonic(self):
         """
         .. versionadded:: 0.20.0
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index fec3a9bd24cc85..56df454bddf1c1 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -364,12 +364,6 @@ def to_timestamp(self, freq=None, how='start'):
                                          name=self.name,
                                          freq=result.freq)
 
-    def _format_native_types(self, na_rep=u'NaT', quoting=None, **kwargs):
-        # just dispatch, return ndarray
-        return self._data._format_native_types(na_rep=na_rep,
-                                               quoting=quoting,
-                                               **kwargs)
-
     def _maybe_convert_timedelta(self, other):
         """
         Convert timedelta-like input to an integer multiple of self.freq
@@ -412,6 +406,19 @@ def _maybe_convert_timedelta(self, other):
         raise IncompatibleFrequency(msg.format(cls=type(self).__name__,
                                                freqstr=self.freqstr))
 
+    # ------------------------------------------------------------------------
+    # Rendering Methods
+
+    def _format_native_types(self, na_rep=u'NaT', quoting=None, **kwargs):
+        # just dispatch, return ndarray
+        return self._data._format_native_types(na_rep=na_rep,
+                                               quoting=quoting,
+                                               **kwargs)
+
+    def _mpl_repr(self):
+        # how to represent ourselves to matplotlib
+        return self.astype(object).values
+
     # ------------------------------------------------------------------------
     # Indexing
 
@@ -595,10 +602,6 @@ def is_full(self):
         values = self.asi8
         return ((values[1:] - values[:-1]) < 2).all()
 
-    def _mpl_repr(self):
-        # how to represent ourselves to matplotlib
-        return self.astype(object).values
-
     @property
     def inferred_type(self):
         # b/c data is represented as ints make sure we can't have ambiguous
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index d6286244fcb7ec..364aadb9523f02 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -25,7 +25,6 @@
 
 
 class RangeIndex(Int64Index):
-
     """
     Immutable Index implementing a monotonic integer range.
 
@@ -64,6 +63,9 @@ class RangeIndex(Int64Index):
     _typ = 'rangeindex'
     _engine_type = libindex.Int64Engine
 
+    # --------------------------------------------------------------------
+    # Constructors
+
     def __new__(cls, start=None, stop=None, step=None,
                 dtype=None, copy=False, name=None, fastpath=None):
 
@@ -158,6 +160,8 @@ def _simple_new(cls, start, stop=None, step=None, name=None,
         result._reset_identity()
         return result
 
+    # --------------------------------------------------------------------
+
     @staticmethod
     def _validate_dtype(dtype):
         """ require dtype to be None or int64 """
@@ -188,6 +192,9 @@ def __reduce__(self):
         d.update(dict(self._get_data_as_items()))
         return ibase._new_Index, (self.__class__, d), None
 
+    # --------------------------------------------------------------------
+    # Rendering Methods
+
     def _format_attrs(self):
         """
         Return a list of tuples of the (attr, formatted_value)
@@ -201,6 +208,8 @@ def _format_data(self, name=None):
         # we are formatting thru the attributes
         return None
 
+    # --------------------------------------------------------------------
+
     @cache_readonly
     def nbytes(self):
         """
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 8f50b40a207385..9ceb49a60edd2d 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -127,6 +127,9 @@ def _join_i8_wrapper(joinf, **kwargs):
 
     _freq = None
 
+    # -------------------------------------------------------------------
+    # Constructors
+
     def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
                 periods=None, closed=None, dtype=None, copy=False,
                 name=None, verify_integrity=True):
@@ -193,10 +196,7 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE):
         result._reset_identity()
         return result
 
-    @property
-    def _formatter_func(self):
-        from pandas.io.formats.format import _get_format_timedelta64
-        return _get_format_timedelta64(self, box=True)
+    # -------------------------------------------------------------------
 
     def __setstate__(self, state):
         """Necessary for making this object picklable"""
@@ -218,6 +218,14 @@ def _evaluate_with_timedelta_like(self, other, op):
         result = TimedeltaArray._evaluate_with_timedelta_like(self, other, op)
         return wrap_arithmetic_op(self, other, result)
 
+    # -------------------------------------------------------------------
+    # Rendering Methods
+
+    @property
+    def _formatter_func(self):
+        from pandas.io.formats.format import _get_format_timedelta64
+        return _get_format_timedelta64(self, box=True)
+
     def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
         from pandas.io.formats.format import Timedelta64Formatter
         return Timedelta64Formatter(values=self,

From 15f70464580d9e6223a63278117b8baaa3261c22 Mon Sep 17 00:00:00 2001
From: chris-b1 <cbartak@gmail.com>
Date: Wed, 28 Nov 2018 16:22:17 -0600
Subject: [PATCH 23/78] CLN: keyerror in versioneer error message (#23974)

---
 pandas/_version.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/_version.py b/pandas/_version.py
index 036c927df45d34..d000539421b919 100644
--- a/pandas/_version.py
+++ b/pandas/_version.py
@@ -238,14 +238,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
         # tag
         full_tag = mo.group(1)
         if not full_tag.startswith(tag_prefix):
+            fmt = ("tag '{full_tag}' doesn't start with prefix "
+                   "'{tag_prefix}'")
+            msg = fmt.format(full_tag=full_tag, tag_prefix=tag_prefix)
             if verbose:
-                fmt = "tag '{full_tag}' doesn't start with prefix " \
-                      "'{tag_prefix}'"
-                print(fmt.format(full_tag=full_tag, tag_prefix=tag_prefix))
-            pieces["error"] = ("tag '{full_tag}' doesn't start with "
-                               "prefix '{tag_prefix}'".format(
-                                   full_tag, tag_prefix))
+                print(msg)
+            pieces["error"] = msg
             return pieces
+
         pieces["closest-tag"] = full_tag[len(tag_prefix):]
 
         # distance: number of commits since tag

From 440469b806a169ba7467c36ff7c565797b87a0fd Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 28 Nov 2018 14:25:09 -0800
Subject: [PATCH 24/78] Collect methods in generic/frame/series (#23973)

---
 pandas/core/frame.py   | 110 ++++----
 pandas/core/generic.py | 624 +++++++++++++++++++++--------------------
 pandas/core/series.py  |  19 +-
 3 files changed, 395 insertions(+), 358 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 688f0226dcdbaa..b4b8d151151bd6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -369,6 +369,9 @@ def _constructor_expanddim(self):
         from pandas.core.panel import Panel
         return Panel
 
+    # ----------------------------------------------------------------------
+    # Constructors
+
     def __init__(self, data=None, index=None, columns=None, dtype=None,
                  copy=False):
         if data is None:
@@ -575,6 +578,8 @@ def _get_axes(N, K, index=index, columns=columns):
 
         return create_block_manager_from_blocks([values], [columns, index])
 
+    # ----------------------------------------------------------------------
+
     @property
     def axes(self):
         """
@@ -643,6 +648,9 @@ def _is_homogeneous_type(self):
         else:
             return not self._data.is_mixed_type
 
+    # ----------------------------------------------------------------------
+    # Rendering Methods
+
     def _repr_fits_vertical_(self):
         """
         Check length against max_rows.
@@ -770,6 +778,57 @@ def _repr_html_(self):
         else:
             return None
 
+    @Substitution(header='Write out the column names. If a list of strings '
+                         'is given, it is assumed to be aliases for the '
+                         'column names')
+    @Substitution(shared_params=fmt.common_docstring,
+                  returns=fmt.return_docstring)
+    def to_string(self, buf=None, columns=None, col_space=None, header=True,
+                  index=True, na_rep='NaN', formatters=None, float_format=None,
+                  sparsify=None, index_names=True, justify=None,
+                  max_rows=None, max_cols=None, show_dimensions=False,
+                  decimal='.', line_width=None):
+        """
+        Render a DataFrame to a console-friendly tabular output.
+        %(shared_params)s
+        line_width : int, optional
+            Width to wrap a line in characters.
+        %(returns)s
+        See Also
+        --------
+        to_html : Convert DataFrame to HTML.
+
+        Examples
+        --------
+        >>> d = {'col1': [1, 2, 3], 'col2': [4, 5, 6]}
+        >>> df = pd.DataFrame(d)
+        >>> print(df.to_string())
+           col1  col2
+        0     1     4
+        1     2     5
+        2     3     6
+        """
+
+        formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns,
+                                           col_space=col_space, na_rep=na_rep,
+                                           formatters=formatters,
+                                           float_format=float_format,
+                                           sparsify=sparsify, justify=justify,
+                                           index_names=index_names,
+                                           header=header, index=index,
+                                           max_rows=max_rows,
+                                           max_cols=max_cols,
+                                           show_dimensions=show_dimensions,
+                                           decimal=decimal,
+                                           line_width=line_width)
+        formatter.to_string()
+
+        if buf is None:
+            result = formatter.buf.getvalue()
+            return result
+
+    # ----------------------------------------------------------------------
+
     @property
     def style(self):
         """
@@ -2051,55 +2110,6 @@ def to_parquet(self, fname, engine='auto', compression='snappy',
                    compression=compression, index=index,
                    partition_cols=partition_cols, **kwargs)
 
-    @Substitution(header='Write out the column names. If a list of strings '
-                         'is given, it is assumed to be aliases for the '
-                         'column names')
-    @Substitution(shared_params=fmt.common_docstring,
-                  returns=fmt.return_docstring)
-    def to_string(self, buf=None, columns=None, col_space=None, header=True,
-                  index=True, na_rep='NaN', formatters=None, float_format=None,
-                  sparsify=None, index_names=True, justify=None,
-                  max_rows=None, max_cols=None, show_dimensions=False,
-                  decimal='.', line_width=None):
-        """
-        Render a DataFrame to a console-friendly tabular output.
-        %(shared_params)s
-        line_width : int, optional
-            Width to wrap a line in characters.
-        %(returns)s
-        See Also
-        --------
-        to_html : Convert DataFrame to HTML.
-
-        Examples
-        --------
-        >>> d = {'col1': [1, 2, 3], 'col2': [4, 5, 6]}
-        >>> df = pd.DataFrame(d)
-        >>> print(df.to_string())
-           col1  col2
-        0     1     4
-        1     2     5
-        2     3     6
-        """
-
-        formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns,
-                                           col_space=col_space, na_rep=na_rep,
-                                           formatters=formatters,
-                                           float_format=float_format,
-                                           sparsify=sparsify, justify=justify,
-                                           index_names=index_names,
-                                           header=header, index=index,
-                                           max_rows=max_rows,
-                                           max_cols=max_cols,
-                                           show_dimensions=show_dimensions,
-                                           decimal=decimal,
-                                           line_width=line_width)
-        formatter.to_string()
-
-        if buf is None:
-            result = formatter.buf.getvalue()
-            return result
-
     @Substitution(header='Whether to print column labels, default True')
     @Substitution(shared_params=fmt.common_docstring,
                   returns=fmt.return_docstring)
@@ -2158,6 +2168,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True,
         if buf is None:
             return formatter.buf.getvalue()
 
+    # ----------------------------------------------------------------------
+
     def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
              null_counts=None):
         """
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index e8402e7eefd901..f43b93f200db3b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -111,6 +111,9 @@ class NDFrame(PandasObject, SelectionMixin):
     _metadata = []
     _is_copy = None
 
+    # ----------------------------------------------------------------------
+    # Constructors
+
     def __init__(self, data, axes=None, copy=False, dtype=None,
                  fastpath=False):
 
@@ -128,6 +131,25 @@ def __init__(self, data, axes=None, copy=False, dtype=None,
         object.__setattr__(self, '_data', data)
         object.__setattr__(self, '_item_cache', {})
 
+    def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
+        """ passed a manager and a axes dict """
+        for a, axe in axes.items():
+            if axe is not None:
+                mgr = mgr.reindex_axis(axe,
+                                       axis=self._get_block_manager_axis(a),
+                                       copy=False)
+
+        # make a copy if explicitly requested
+        if copy:
+            mgr = mgr.copy()
+        if dtype is not None:
+            # avoid further copies if we can
+            if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
+                mgr = mgr.astype(dtype=dtype)
+        return mgr
+
+    # ----------------------------------------------------------------------
+
     @property
     def is_copy(self):
         warnings.warn("Attribute 'is_copy' is deprecated and will be removed "
@@ -140,17 +162,6 @@ def is_copy(self, msg):
                       "in a future version.", FutureWarning, stacklevel=2)
         self._is_copy = msg
 
-    def _repr_data_resource_(self):
-        """
-        Not a real Jupyter special repr method, but we use the same
-        naming convention.
-        """
-        if config.get_option("display.html.table_schema"):
-            data = self.head(config.get_option('display.max_rows'))
-            payload = json.loads(data.to_json(orient='table'),
-                                 object_pairs_hook=collections.OrderedDict)
-            return payload
-
     def _validate_dtype(self, dtype):
         """ validate the passed dtype """
 
@@ -165,23 +176,6 @@ def _validate_dtype(self, dtype):
 
         return dtype
 
-    def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
-        """ passed a manager and a axes dict """
-        for a, axe in axes.items():
-            if axe is not None:
-                mgr = mgr.reindex_axis(axe,
-                                       axis=self._get_block_manager_axis(a),
-                                       copy=False)
-
-        # make a copy if explicitly requested
-        if copy:
-            mgr = mgr.copy()
-        if dtype is not None:
-            # avoid further copies if we can
-            if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
-                mgr = mgr.astype(dtype=dtype)
-        return mgr
-
     # ----------------------------------------------------------------------
     # Construction
 
@@ -192,20 +186,6 @@ def _constructor(self):
         """
         raise AbstractMethodError(self)
 
-    def __unicode__(self):
-        # unicode representation based upon iterating over self
-        # (since, by definition, `PandasContainers` are iterable)
-        prepr = '[%s]' % ','.join(map(pprint_thing, self))
-        return '%s(%s)' % (self.__class__.__name__, prepr)
-
-    def _dir_additions(self):
-        """ add the string-like attributes from the info_axis.
-        If info_axis is a MultiIndex, it's first level values are used.
-        """
-        additions = {c for c in self._info_axis.unique(level=0)[:100]
-                     if isinstance(c, string_types) and isidentifier(c)}
-        return super(NDFrame, self)._dir_additions().union(additions)
-
     @property
     def _constructor_sliced(self):
         """Used when a manipulation result has one lower dimension(s) as the
@@ -1338,48 +1318,12 @@ def _set_axis_name(self, name, axis=0, inplace=False):
             return renamed
 
     # ----------------------------------------------------------------------
-    # Comparisons
+    # Comparison Methods
 
     def _indexed_same(self, other):
         return all(self._get_axis(a).equals(other._get_axis(a))
                    for a in self._AXIS_ORDERS)
 
-    def __neg__(self):
-        values = com.values_from_object(self)
-        if is_bool_dtype(values):
-            arr = operator.inv(values)
-        elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
-                or is_object_dtype(values)):
-            arr = operator.neg(values)
-        else:
-            raise TypeError("Unary negative expects numeric dtype, not {}"
-                            .format(values.dtype))
-        return self.__array_wrap__(arr)
-
-    def __pos__(self):
-        values = com.values_from_object(self)
-        if (is_bool_dtype(values) or is_period_arraylike(values)):
-            arr = values
-        elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
-                or is_object_dtype(values)):
-            arr = operator.pos(values)
-        else:
-            raise TypeError("Unary plus expects numeric dtype, not {}"
-                            .format(values.dtype))
-        return self.__array_wrap__(arr)
-
-    def __invert__(self):
-        try:
-            arr = operator.inv(com.values_from_object(self))
-            return self.__array_wrap__(arr)
-        except Exception:
-
-            # inv fails with 0 len
-            if not np.prod(self.shape):
-                return self
-
-            raise
-
     def equals(self, other):
         """
         Test whether two objects contain the same elements.
@@ -1466,6 +1410,74 @@ def equals(self, other):
             return False
         return self._data.equals(other._data)
 
+    # -------------------------------------------------------------------------
+    # Unary Methods
+
+    def __neg__(self):
+        values = com.values_from_object(self)
+        if is_bool_dtype(values):
+            arr = operator.inv(values)
+        elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
+                or is_object_dtype(values)):
+            arr = operator.neg(values)
+        else:
+            raise TypeError("Unary negative expects numeric dtype, not {}"
+                            .format(values.dtype))
+        return self.__array_wrap__(arr)
+
+    def __pos__(self):
+        values = com.values_from_object(self)
+        if (is_bool_dtype(values) or is_period_arraylike(values)):
+            arr = values
+        elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
+                or is_object_dtype(values)):
+            arr = operator.pos(values)
+        else:
+            raise TypeError("Unary plus expects numeric dtype, not {}"
+                            .format(values.dtype))
+        return self.__array_wrap__(arr)
+
+    def __invert__(self):
+        try:
+            arr = operator.inv(com.values_from_object(self))
+            return self.__array_wrap__(arr)
+        except Exception:
+
+            # inv fails with 0 len
+            if not np.prod(self.shape):
+                return self
+
+            raise
+
+    def __nonzero__(self):
+        raise ValueError("The truth value of a {0} is ambiguous. "
+                         "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
+                         .format(self.__class__.__name__))
+
+    __bool__ = __nonzero__
+
+    def bool(self):
+        """Return the bool of a single element PandasObject.
+
+        This must be a boolean scalar value, either True or False.  Raise a
+        ValueError if the PandasObject does not have exactly 1 element, or that
+        element is not boolean
+        """
+        v = self.squeeze()
+        if isinstance(v, (bool, np.bool_)):
+            return bool(v)
+        elif is_scalar(v):
+            raise ValueError("bool cannot act on a non-boolean single element "
+                             "{0}".format(self.__class__.__name__))
+
+        self.__nonzero__()
+
+    def __abs__(self):
+        return self.abs()
+
+    def __round__(self, decimals=0):
+        return self.round(decimals)
+
     # -------------------------------------------------------------------------
     # Label or Level Combination Helpers
     #
@@ -1858,35 +1870,6 @@ def empty(self):
         """
         return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS)
 
-    def __nonzero__(self):
-        raise ValueError("The truth value of a {0} is ambiguous. "
-                         "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
-                         .format(self.__class__.__name__))
-
-    __bool__ = __nonzero__
-
-    def bool(self):
-        """Return the bool of a single element PandasObject.
-
-        This must be a boolean scalar value, either True or False.  Raise a
-        ValueError if the PandasObject does not have exactly 1 element, or that
-        element is not boolean
-        """
-        v = self.squeeze()
-        if isinstance(v, (bool, np.bool_)):
-            return bool(v)
-        elif is_scalar(v):
-            raise ValueError("bool cannot act on a non-boolean single element "
-                             "{0}".format(self.__class__.__name__))
-
-        self.__nonzero__()
-
-    def __abs__(self):
-        return self.abs()
-
-    def __round__(self, decimals=0):
-        return self.round(decimals)
-
     # ----------------------------------------------------------------------
     # Array Interface
 
@@ -1962,7 +1945,13 @@ def __setstate__(self, state):
         self._item_cache = {}
 
     # ----------------------------------------------------------------------
-    # IO
+    # Rendering Methods
+
+    def __unicode__(self):
+        # unicode representation based upon iterating over self
+        # (since, by definition, `PandasContainers` are iterable)
+        prepr = '[%s]' % ','.join(map(pprint_thing, self))
+        return '%s(%s)' % (self.__class__.__name__, prepr)
 
     def _repr_latex_(self):
         """
@@ -1974,6 +1963,17 @@ def _repr_latex_(self):
         else:
             return None
 
+    def _repr_data_resource_(self):
+        """
+        Not a real Jupyter special repr method, but we use the same
+        naming convention.
+        """
+        if config.get_option("display.html.table_schema"):
+            data = self.head(config.get_option('display.max_rows'))
+            payload = json.loads(data.to_json(orient='table'),
+                                 object_pairs_hook=collections.OrderedDict)
+            return payload
+
     # ----------------------------------------------------------------------
     # I/O Methods
 
@@ -2079,6 +2079,25 @@ def _repr_latex_(self):
     >>> df1.to_excel('output1.xlsx', engine='xlsxwriter')  # doctest: +SKIP
     """
 
+    @Appender(_shared_docs["to_excel"] % dict(klass="object"))
+    def to_excel(self, excel_writer, sheet_name="Sheet1", na_rep="",
+                 float_format=None, columns=None, header=True, index=True,
+                 index_label=None, startrow=0, startcol=0, engine=None,
+                 merge_cells=True, encoding=None, inf_rep="inf", verbose=True,
+                 freeze_panes=None):
+        df = self if isinstance(self, ABCDataFrame) else self.to_frame()
+
+        from pandas.io.formats.excel import ExcelFormatter
+        formatter = ExcelFormatter(df, na_rep=na_rep, cols=columns,
+                                   header=header,
+                                   float_format=float_format, index=index,
+                                   index_label=index_label,
+                                   merge_cells=merge_cells,
+                                   inf_rep=inf_rep)
+        formatter.write(excel_writer, sheet_name=sheet_name, startrow=startrow,
+                        startcol=startcol, freeze_panes=freeze_panes,
+                        engine=engine)
+
     def to_json(self, path_or_buf=None, orient=None, date_format=None,
                 double_precision=10, force_ascii=True, date_unit='ms',
                 default_handler=None, lines=False, compression='infer',
@@ -2769,57 +2788,199 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
 
         Returns
         -------
-        str or None
-            If buf is None, returns the resulting LateX format as a
+        str or None
+            If buf is None, returns the resulting LateX format as a
+            string. Otherwise returns None.
+
+        See Also
+        --------
+        DataFrame.to_string : Render a DataFrame to a console-friendly
+            tabular output.
+        DataFrame.to_html : Render a DataFrame as an HTML table.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
+        ...                    'mask': ['red', 'purple'],
+        ...                    'weapon': ['sai', 'bo staff']})
+        >>> df.to_latex(index=False) # doctest: +NORMALIZE_WHITESPACE
+        '\\begin{tabular}{lll}\n\\toprule\n      name &    mask &    weapon
+        \\\\\n\\midrule\n   Raphael &     red &       sai \\\\\n Donatello &
+         purple &  bo staff \\\\\n\\bottomrule\n\\end{tabular}\n'
+        """
+        # Get defaults from the pandas config
+        if self.ndim == 1:
+            self = self.to_frame()
+        if longtable is None:
+            longtable = config.get_option("display.latex.longtable")
+        if escape is None:
+            escape = config.get_option("display.latex.escape")
+        if multicolumn is None:
+            multicolumn = config.get_option("display.latex.multicolumn")
+        if multicolumn_format is None:
+            multicolumn_format = config.get_option(
+                "display.latex.multicolumn_format")
+        if multirow is None:
+            multirow = config.get_option("display.latex.multirow")
+
+        formatter = DataFrameFormatter(self, buf=buf, columns=columns,
+                                       col_space=col_space, na_rep=na_rep,
+                                       header=header, index=index,
+                                       formatters=formatters,
+                                       float_format=float_format,
+                                       bold_rows=bold_rows,
+                                       sparsify=sparsify,
+                                       index_names=index_names,
+                                       escape=escape, decimal=decimal)
+        formatter.to_latex(column_format=column_format, longtable=longtable,
+                           encoding=encoding, multicolumn=multicolumn,
+                           multicolumn_format=multicolumn_format,
+                           multirow=multirow)
+
+        if buf is None:
+            return formatter.buf.getvalue()
+
+    def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
+               columns=None, header=True, index=True, index_label=None,
+               mode='w', encoding=None, compression='infer', quoting=None,
+               quotechar='"', line_terminator=None, chunksize=None,
+               tupleize_cols=None, date_format=None, doublequote=True,
+               escapechar=None, decimal='.'):
+        r"""
+        Write object to a comma-separated values (csv) file.
+
+        .. versionchanged:: 0.24.0
+            The order of arguments for Series was changed.
+
+        Parameters
+        ----------
+        path_or_buf : str or file handle, default None
+            File path or object, if None is provided the result is returned as
+            a string.
+
+            .. versionchanged:: 0.24.0
+
+               Was previously named "path" for Series.
+
+        sep : str, default ','
+            String of length 1. Field delimiter for the output file.
+        na_rep : str, default ''
+            Missing data representation.
+        float_format : str, default None
+            Format string for floating point numbers.
+        columns : sequence, optional
+            Columns to write.
+        header : bool or list of str, default True
+            Write out the column names. If a list of strings is given it is
+            assumed to be aliases for the column names.
+
+            .. versionchanged:: 0.24.0
+
+               Previously defaulted to False for Series.
+
+        index : bool, default True
+            Write row names (index).
+        index_label : str or sequence, or False, default None
+            Column label for index column(s) if desired. If None is given, and
+            `header` and `index` are True, then the index names are used. A
+            sequence should be given if the object uses MultiIndex. If
+            False do not print fields for index names. Use index_label=False
+            for easier importing in R.
+        mode : str
+            Python write mode, default 'w'.
+        encoding : str, optional
+            A string representing the encoding to use in the output file,
+            defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
+        compression : str, default 'infer'
+            Compression mode among the following possible values: {'infer',
+            'gzip', 'bz2', 'zip', 'xz', None}. If 'infer' and `path_or_buf`
+            is path-like, then detect compression from the following
+            extensions: '.gz', '.bz2', '.zip' or '.xz'. (otherwise no
+            compression).
+
+            .. versionchanged:: 0.24.0
+
+               'infer' option added and set to default.
+
+        quoting : optional constant from csv module
+            Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
+            then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
+            will treat them as non-numeric.
+        quotechar : str, default '\"'
+            String of length 1. Character used to quote fields.
+        line_terminator : string, optional
+            The newline character or character sequence to use in the output
+            file. Defaults to `os.linesep`, which depends on the OS in which
+            this method is called ('\n' for linux, '\r\n' for Windows, i.e.).
+
+            .. versionchanged:: 0.24.0
+        chunksize : int or None
+            Rows to write at a time.
+        tupleize_cols : bool, default False
+            Write MultiIndex columns as a list of tuples (if True) or in
+            the new, expanded format, where each MultiIndex column is a row
+            in the CSV (if False).
+
+            .. deprecated:: 0.21.0
+               This argument will be removed and will always write each row
+               of the multi-index as a separate row in the CSV file.
+        date_format : str, default None
+            Format string for datetime objects.
+        doublequote : bool, default True
+            Control quoting of `quotechar` inside a field.
+        escapechar : str, default None
+            String of length 1. Character used to escape `sep` and `quotechar`
+            when appropriate.
+        decimal : str, default '.'
+            Character recognized as decimal separator. E.g. use ',' for
+            European data.
+
+        Returns
+        -------
+        None or str
+            If path_or_buf is None, returns the resulting csv format as a
             string. Otherwise returns None.
 
         See Also
         --------
-        DataFrame.to_string : Render a DataFrame to a console-friendly
-            tabular output.
-        DataFrame.to_html : Render a DataFrame as an HTML table.
+        read_csv : Load a CSV file into a DataFrame.
+        to_excel : Load an Excel file into a DataFrame.
 
         Examples
         --------
         >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
         ...                    'mask': ['red', 'purple'],
         ...                    'weapon': ['sai', 'bo staff']})
-        >>> df.to_latex(index=False) # doctest: +NORMALIZE_WHITESPACE
-        '\\begin{tabular}{lll}\n\\toprule\n      name &    mask &    weapon
-        \\\\\n\\midrule\n   Raphael &     red &       sai \\\\\n Donatello &
-         purple &  bo staff \\\\\n\\bottomrule\n\\end{tabular}\n'
+        >>> df.to_csv(index=False)
+        'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
         """
-        # Get defaults from the pandas config
-        if self.ndim == 1:
-            self = self.to_frame()
-        if longtable is None:
-            longtable = config.get_option("display.latex.longtable")
-        if escape is None:
-            escape = config.get_option("display.latex.escape")
-        if multicolumn is None:
-            multicolumn = config.get_option("display.latex.multicolumn")
-        if multicolumn_format is None:
-            multicolumn_format = config.get_option(
-                "display.latex.multicolumn_format")
-        if multirow is None:
-            multirow = config.get_option("display.latex.multirow")
 
-        formatter = DataFrameFormatter(self, buf=buf, columns=columns,
-                                       col_space=col_space, na_rep=na_rep,
-                                       header=header, index=index,
-                                       formatters=formatters,
-                                       float_format=float_format,
-                                       bold_rows=bold_rows,
-                                       sparsify=sparsify,
-                                       index_names=index_names,
-                                       escape=escape, decimal=decimal)
-        formatter.to_latex(column_format=column_format, longtable=longtable,
-                           encoding=encoding, multicolumn=multicolumn,
-                           multicolumn_format=multicolumn_format,
-                           multirow=multirow)
+        df = self if isinstance(self, ABCDataFrame) else self.to_frame()
 
-        if buf is None:
-            return formatter.buf.getvalue()
+        if tupleize_cols is not None:
+            warnings.warn("The 'tupleize_cols' parameter is deprecated and "
+                          "will be removed in a future version",
+                          FutureWarning, stacklevel=2)
+        else:
+            tupleize_cols = False
+
+        from pandas.io.formats.csvs import CSVFormatter
+        formatter = CSVFormatter(df, path_or_buf,
+                                 line_terminator=line_terminator, sep=sep,
+                                 encoding=encoding,
+                                 compression=compression, quoting=quoting,
+                                 na_rep=na_rep, float_format=float_format,
+                                 cols=columns, header=header, index=index,
+                                 index_label=index_label, mode=mode,
+                                 chunksize=chunksize, quotechar=quotechar,
+                                 tupleize_cols=tupleize_cols,
+                                 date_format=date_format,
+                                 doublequote=doublequote,
+                                 escapechar=escapechar, decimal=decimal)
+        formatter.save()
+
+        if path_or_buf is None:
+            return formatter.path_or_buf.getvalue()
 
     # ----------------------------------------------------------------------
     # Fancy Indexing
@@ -4857,6 +5018,14 @@ def __setattr__(self, name, value):
                                   stacklevel=2)
                 object.__setattr__(self, name, value)
 
+    def _dir_additions(self):
+        """ add the string-like attributes from the info_axis.
+        If info_axis is a MultiIndex, it's first level values are used.
+        """
+        additions = {c for c in self._info_axis.unique(level=0)[:100]
+                     if isinstance(c, string_types) and isidentifier(c)}
+        return super(NDFrame, self)._dir_additions().union(additions)
+
     # ----------------------------------------------------------------------
     # Getting and setting elements
 
@@ -9909,167 +10078,6 @@ def first_valid_index(self):
     def last_valid_index(self):
         return self._find_valid_index('last')
 
-    def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
-               columns=None, header=True, index=True, index_label=None,
-               mode='w', encoding=None, compression='infer', quoting=None,
-               quotechar='"', line_terminator=None, chunksize=None,
-               tupleize_cols=None, date_format=None, doublequote=True,
-               escapechar=None, decimal='.'):
-        r"""
-        Write object to a comma-separated values (csv) file.
-
-        .. versionchanged:: 0.24.0
-            The order of arguments for Series was changed.
-
-        Parameters
-        ----------
-        path_or_buf : str or file handle, default None
-            File path or object, if None is provided the result is returned as
-            a string.
-
-            .. versionchanged:: 0.24.0
-
-               Was previously named "path" for Series.
-
-        sep : str, default ','
-            String of length 1. Field delimiter for the output file.
-        na_rep : str, default ''
-            Missing data representation.
-        float_format : str, default None
-            Format string for floating point numbers.
-        columns : sequence, optional
-            Columns to write.
-        header : bool or list of str, default True
-            Write out the column names. If a list of strings is given it is
-            assumed to be aliases for the column names.
-
-            .. versionchanged:: 0.24.0
-
-               Previously defaulted to False for Series.
-
-        index : bool, default True
-            Write row names (index).
-        index_label : str or sequence, or False, default None
-            Column label for index column(s) if desired. If None is given, and
-            `header` and `index` are True, then the index names are used. A
-            sequence should be given if the object uses MultiIndex. If
-            False do not print fields for index names. Use index_label=False
-            for easier importing in R.
-        mode : str
-            Python write mode, default 'w'.
-        encoding : str, optional
-            A string representing the encoding to use in the output file,
-            defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
-        compression : str, default 'infer'
-            Compression mode among the following possible values: {'infer',
-            'gzip', 'bz2', 'zip', 'xz', None}. If 'infer' and `path_or_buf`
-            is path-like, then detect compression from the following
-            extensions: '.gz', '.bz2', '.zip' or '.xz'. (otherwise no
-            compression).
-
-            .. versionchanged:: 0.24.0
-
-               'infer' option added and set to default.
-
-        quoting : optional constant from csv module
-            Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
-            then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
-            will treat them as non-numeric.
-        quotechar : str, default '\"'
-            String of length 1. Character used to quote fields.
-        line_terminator : string, optional
-            The newline character or character sequence to use in the output
-            file. Defaults to `os.linesep`, which depends on the OS in which
-            this method is called ('\n' for linux, '\r\n' for Windows, i.e.).
-
-            .. versionchanged:: 0.24.0
-        chunksize : int or None
-            Rows to write at a time.
-        tupleize_cols : bool, default False
-            Write MultiIndex columns as a list of tuples (if True) or in
-            the new, expanded format, where each MultiIndex column is a row
-            in the CSV (if False).
-
-            .. deprecated:: 0.21.0
-               This argument will be removed and will always write each row
-               of the multi-index as a separate row in the CSV file.
-        date_format : str, default None
-            Format string for datetime objects.
-        doublequote : bool, default True
-            Control quoting of `quotechar` inside a field.
-        escapechar : str, default None
-            String of length 1. Character used to escape `sep` and `quotechar`
-            when appropriate.
-        decimal : str, default '.'
-            Character recognized as decimal separator. E.g. use ',' for
-            European data.
-
-        Returns
-        -------
-        None or str
-            If path_or_buf is None, returns the resulting csv format as a
-            string. Otherwise returns None.
-
-        See Also
-        --------
-        read_csv : Load a CSV file into a DataFrame.
-        to_excel : Load an Excel file into a DataFrame.
-
-        Examples
-        --------
-        >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
-        ...                    'mask': ['red', 'purple'],
-        ...                    'weapon': ['sai', 'bo staff']})
-        >>> df.to_csv(index=False)
-        'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
-        """
-
-        df = self if isinstance(self, ABCDataFrame) else self.to_frame()
-
-        if tupleize_cols is not None:
-            warnings.warn("The 'tupleize_cols' parameter is deprecated and "
-                          "will be removed in a future version",
-                          FutureWarning, stacklevel=2)
-        else:
-            tupleize_cols = False
-
-        from pandas.io.formats.csvs import CSVFormatter
-        formatter = CSVFormatter(df, path_or_buf,
-                                 line_terminator=line_terminator, sep=sep,
-                                 encoding=encoding,
-                                 compression=compression, quoting=quoting,
-                                 na_rep=na_rep, float_format=float_format,
-                                 cols=columns, header=header, index=index,
-                                 index_label=index_label, mode=mode,
-                                 chunksize=chunksize, quotechar=quotechar,
-                                 tupleize_cols=tupleize_cols,
-                                 date_format=date_format,
-                                 doublequote=doublequote,
-                                 escapechar=escapechar, decimal=decimal)
-        formatter.save()
-
-        if path_or_buf is None:
-            return formatter.path_or_buf.getvalue()
-
-    @Appender(_shared_docs["to_excel"] % dict(klass="object"))
-    def to_excel(self, excel_writer, sheet_name="Sheet1", na_rep="",
-                 float_format=None, columns=None, header=True, index=True,
-                 index_label=None, startrow=0, startcol=0, engine=None,
-                 merge_cells=True, encoding=None, inf_rep="inf", verbose=True,
-                 freeze_panes=None):
-        df = self if isinstance(self, ABCDataFrame) else self.to_frame()
-
-        from pandas.io.formats.excel import ExcelFormatter
-        formatter = ExcelFormatter(df, na_rep=na_rep, cols=columns,
-                                   header=header,
-                                   float_format=float_format, index=index,
-                                   index_label=index_label,
-                                   merge_cells=merge_cells,
-                                   inf_rep=inf_rep)
-        formatter.write(excel_writer, sheet_name=sheet_name, startrow=startrow,
-                        startcol=startcol, freeze_panes=freeze_panes,
-                        engine=engine)
-
 
 def _doc_parms(cls):
     """Return a tuple of the doc parms."""
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 4b8274a9e8333c..c3bcd2d76c27ab 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -149,6 +149,9 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
     hasnans = property(base.IndexOpsMixin.hasnans.func,
                        doc=base.IndexOpsMixin.hasnans.__doc__)
 
+    # ----------------------------------------------------------------------
+    # Constructors
+
     def __init__(self, data=None, index=None, dtype=None, name=None,
                  copy=False, fastpath=False):
 
@@ -328,6 +331,8 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False,
         return cls(arr, index=index, name=name, dtype=dtype,
                    copy=copy, fastpath=fastpath)
 
+    # ----------------------------------------------------------------------
+
     @property
     def _constructor(self):
         return Series
@@ -635,6 +640,9 @@ def view(self, dtype=None):
         return self._constructor(self._values.view(dtype),
                                  index=self.index).__finalize__(self)
 
+    # ----------------------------------------------------------------------
+    # NDArray Compat
+
     def __array__(self, result=None):
         """
         The array interface, return my values.
@@ -665,7 +673,9 @@ def __array_prepare__(self, result, context=None):
                                 op=context[0].__name__))
         return result
 
-    # complex
+    # ----------------------------------------------------------------------
+    # Unary Methods
+
     @property
     def real(self):
         return self.values.real
@@ -687,6 +697,8 @@ def imag(self, v):
     __long__ = _coerce_method(int)
     __int__ = _coerce_method(int)
 
+    # ----------------------------------------------------------------------
+
     def _unpickle_series_compat(self, state):
         if isinstance(state, dict):
             self._data = state['_data']
@@ -1224,6 +1236,9 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False):
             df = self.to_frame(name)
             return df.reset_index(level=level, drop=drop)
 
+    # ----------------------------------------------------------------------
+    # Rendering Methods
+
     def __unicode__(self):
         """
         Return a string representation for a particular DataFrame.
@@ -1299,6 +1314,8 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
                 with open(buf, 'w') as f:
                     f.write(result)
 
+    # ----------------------------------------------------------------------
+
     def iteritems(self):
         """
         Lazily iterate over (index, value) tuples.

From 580a0941475d856a0cde2df4ef3472404706cbba Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@icloud.com>
Date: Wed, 28 Nov 2018 17:27:40 -0500
Subject: [PATCH 25/78] GroupBy Cleanup (#23971)

---
 pandas/core/groupby/generic.py |  8 ++++----
 pandas/core/groupby/groupby.py |  5 +++--
 pandas/core/groupby/grouper.py |  9 +++++----
 pandas/core/groupby/ops.py     | 26 +++++++++-----------------
 4 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ada9c5d456a77f..a17e2ce7f1ef54 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -826,8 +826,9 @@ def _aggregate_multiple_funcs(self, arg, _level):
         for name, func in arg:
             obj = self
             if name in results:
-                raise SpecificationError('Function names must be unique, '
-                                         'found multiple named %s' % name)
+                raise SpecificationError(
+                    'Function names must be unique, found multiple named '
+                    '{}'.format(name))
 
             # reset the cache so that we
             # only include the named selection
@@ -1027,8 +1028,7 @@ def nunique(self, dropna=True):
         try:
             sorter = np.lexsort((val, ids))
         except TypeError:  # catches object dtypes
-            msg = ('val.dtype must be object, got {dtype}'
-                   .format(dtype=val.dtype))
+            msg = 'val.dtype must be object, got {}'.format(val.dtype)
             assert val.dtype == object, msg
             val, _ = algorithms.factorize(val, sort=False)
             sorter = np.lexsort((val, ids))
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 292d4207cf2c5c..253860d83f49e0 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1644,7 +1644,8 @@ def nth(self, n, dropna=None):
                 # just returns NaN
                 raise ValueError("For a DataFrame groupby, dropna must be "
                                  "either None, 'any' or 'all', "
-                                 "(was passed %s)." % (dropna),)
+                                 "(was passed {dropna}).".format(
+                                     dropna=dropna))
 
         # old behaviour, but with all and any support for DataFrames.
         # modified in GH 7559 to have better perf
@@ -2099,6 +2100,6 @@ def groupby(obj, by, **kwds):
         from pandas.core.groupby.generic import DataFrameGroupBy
         klass = DataFrameGroupBy
     else:  # pragma: no cover
-        raise TypeError('invalid type: %s' % type(obj))
+        raise TypeError('invalid type: {}'.format(obj))
 
     return klass(obj, by, **kwds)
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index b49bc5ee5950f9..d8df227d4911a0 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -257,7 +257,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
         if level is not None:
             if not isinstance(level, int):
                 if level not in index.names:
-                    raise AssertionError('Level %s not in index' % str(level))
+                    raise AssertionError('Level {} not in index'.format(level))
                 level = index.names.index(level)
 
             if self.name is None:
@@ -317,7 +317,8 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
                                 (Series, Index, ExtensionArray, np.ndarray)):
                 if getattr(self.grouper, 'ndim', 1) != 1:
                     t = self.name or str(type(self.grouper))
-                    raise ValueError("Grouper for '%s' not 1-dimensional" % t)
+                    raise ValueError(
+                        "Grouper for '{}' not 1-dimensional".format(t))
                 self.grouper = self.index.map(self.grouper)
                 if not (hasattr(self.grouper, "__len__") and
                         len(self.grouper) == len(self.index)):
@@ -460,8 +461,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
 
             if isinstance(level, compat.string_types):
                 if obj.index.name != level:
-                    raise ValueError('level name %s is not the name of the '
-                                     'index' % level)
+                    raise ValueError('level name {} is not the name of the '
+                                     'index'.format(level))
             elif level > 0 or level < -1:
                 raise ValueError('level > 0 or level < -1 only valid with '
                                  ' MultiIndex')
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 125bd9a5e855d9..6dcbedfa112bb5 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -380,7 +380,8 @@ def get_func(fname):
 
             # otherwise find dtype-specific version, falling back to object
             for dt in [dtype_str, 'object']:
-                f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None)
+                f = getattr(libgroupby, "{fname}_{dtype_str}".format(
+                    fname=fname, dtype_str=dtype_str), None)
                 if f is not None:
                     return f
 
@@ -403,9 +404,11 @@ def wrapper(*args, **kwargs):
             func = get_func(ftype)
 
         if func is None:
-            raise NotImplementedError("function is not implemented for this"
-                                      "dtype: [how->%s,dtype->%s]" %
-                                      (how, dtype_str))
+            raise NotImplementedError(
+                "function is not implemented for this dtype: "
+                "[how->{how},dtype->{dtype_str}]".format(how=how,
+                                                         dtype_str=dtype_str))
+
         return func
 
     def _cython_operation(self, kind, values, how, axis, min_count=-1,
@@ -485,7 +488,8 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
             out_dtype = 'float'
         else:
             if is_numeric:
-                out_dtype = '%s%d' % (values.dtype.kind, values.dtype.itemsize)
+                out_dtype = '{kind}{itemsize}'.format(
+                    kind=values.dtype.kind, itemsize=values.dtype.itemsize)
             else:
                 out_dtype = 'object'
 
@@ -739,10 +743,6 @@ def group_info(self):
                 obs_group_ids.astype('int64', copy=False),
                 ngroups)
 
-    @cache_readonly
-    def ngroups(self):
-        return len(self.result_index)
-
     @cache_readonly
     def result_index(self):
         if len(self.binlabels) != 0 and isna(self.binlabels[0]):
@@ -769,11 +769,6 @@ def agg_series(self, obj, func):
         grouper = reduction.SeriesBinGrouper(obj, func, self.bins, dummy)
         return grouper.get_result()
 
-    # ----------------------------------------------------------------------
-    # cython aggregation
-
-    _cython_functions = copy.deepcopy(BaseGrouper._cython_functions)
-
 
 def _get_axes(group):
     if isinstance(group, Series):
@@ -853,9 +848,6 @@ def _chop(self, sdata, slice_obj):
 
 class FrameSplitter(DataSplitter):
 
-    def __init__(self, data, labels, ngroups, axis=0):
-        super(FrameSplitter, self).__init__(data, labels, ngroups, axis=axis)
-
     def fast_apply(self, f, names):
         # must return keys::list, values::list, mutated::bool
         try:

From b1ee2dff81ca65fcf0e57fa673cb6bf1e47c18d4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 28 Nov 2018 15:33:32 -0800
Subject: [PATCH 26/78] REF/TST: Fix remaining DatetimeArray with DateOffset
 arithmetic ops (#23789)

---
 pandas/_libs/tslibs/timestamps.pyx            |   6 +-
 pandas/core/arrays/timedeltas.py              |  43 +-
 pandas/core/indexes/datetimes.py              |   5 +
 pandas/core/indexes/timedeltas.py             |   5 +
 pandas/tests/arithmetic/test_datetime64.py    | 448 +++++++++---------
 pandas/tests/arithmetic/test_timedelta64.py   |   5 +-
 .../indexes/timedeltas/test_construction.py   |   5 +
 pandas/tseries/frequencies.py                 |   6 +-
 pandas/tseries/offsets.py                     |  50 +-
 9 files changed, 323 insertions(+), 250 deletions(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index bb7a9a57b8a75f..f414cd161e562f 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -377,13 +377,15 @@ cdef class _Timestamp(datetime):
             neg_other = -other
             return self + neg_other
 
+        typ = getattr(other, '_typ', None)
+
         # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex
-        elif getattr(other, '_typ', None) == 'datetimeindex':
+        if typ in ('datetimeindex', 'datetimearray'):
             # timezone comparison is performed in DatetimeIndex._sub_datelike
             return -other.__sub__(self)
 
         # a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex
-        elif getattr(other, '_typ', None) == 'timedeltaindex':
+        elif typ in ('timedeltaindex', 'timedeltaarray'):
             return (-other).__add__(self)
 
         elif other is NaT:
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index da26966cfa94c3..83cea51cec9f6f 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from pandas._libs import tslibs
+from pandas._libs import algos, tslibs
 from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
 from pandas._libs.tslibs.fields import get_timedelta_field
 from pandas._libs.tslibs.timedeltas import (
@@ -24,7 +24,7 @@
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import ops
-from pandas.core.algorithms import checked_add_with_arr
+from pandas.core.algorithms import checked_add_with_arr, unique1d
 import pandas.core.common as com
 
 from pandas.tseries.frequencies import to_offset
@@ -162,15 +162,29 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE):
         result._freq = freq
         return result
 
-    def __new__(cls, values, freq=None, dtype=_TD_DTYPE):
+    def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False):
 
         freq, freq_infer = dtl.maybe_infer_freq(freq)
 
-        values = np.array(values, copy=False)
-        if values.dtype == np.object_:
-            values = array_to_timedelta64(values)
+        values, inferred_freq = sequence_to_td64ns(
+            values, copy=copy, unit=None)
+        if inferred_freq is not None:
+            if freq is not None and freq != inferred_freq:
+                raise ValueError('Inferred frequency {inferred} from passed '
+                                 'values does not conform to passed frequency '
+                                 '{passed}'
+                                 .format(inferred=inferred_freq,
+                                         passed=freq.freqstr))
+            elif freq is None:
+                freq = inferred_freq
+                freq_infer = False
 
         result = cls._simple_new(values, freq=freq)
+        # check that we are matching freqs
+        if inferred_freq is None and len(result) > 0:
+            if freq is not None and not freq_infer:
+                cls._validate_frequency(result, freq)
+
         if freq_infer:
             result.freq = to_offset(result.inferred_freq)
 
@@ -227,6 +241,21 @@ def _validate_fill_value(self, fill_value):
                              "Got '{got}'.".format(got=fill_value))
         return fill_value
 
+    # monotonicity/uniqueness properties are called via frequencies.infer_freq,
+    #  see GH#23789
+
+    @property
+    def _is_monotonic_increasing(self):
+        return algos.is_monotonic(self.asi8, timelike=True)[0]
+
+    @property
+    def _is_monotonic_decreasing(self):
+        return algos.is_monotonic(self.asi8, timelike=True)[1]
+
+    @property
+    def _is_unique(self):
+        return len(unique1d(self.asi8)) == len(self)
+
     # ----------------------------------------------------------------
     # Arithmetic Methods
 
@@ -283,7 +312,7 @@ def _add_datetimelike_scalar(self, other):
         result = checked_add_with_arr(i8, other.value,
                                       arr_mask=self._isnan)
         result = self._maybe_mask_results(result)
-        return DatetimeArrayMixin(result, tz=other.tz)
+        return DatetimeArrayMixin(result, tz=other.tz, freq=self.freq)
 
     def _addsub_offset_array(self, other, op):
         # Add or subtract Array-like of DateOffset objects
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 16c1e22d400176..965d07a438d710 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1135,6 +1135,11 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
     # --------------------------------------------------------------------
     # Wrapping DatetimeArray
 
+    # Compat for frequency inference, see GH#23789
+    _is_monotonic_increasing = Index.is_monotonic_increasing
+    _is_monotonic_decreasing = Index.is_monotonic_decreasing
+    _is_unique = Index.is_unique
+
     _timezone = cache_readonly(DatetimeArray._timezone.fget)
     is_normalized = cache_readonly(DatetimeArray.is_normalized.fget)
     _resolution = cache_readonly(DatetimeArray._resolution.fget)
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 9ceb49a60edd2d..b5136a60b94dfb 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -250,6 +250,11 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
 
     total_seconds = wrap_array_method(TimedeltaArray.total_seconds, True)
 
+    # Compat for frequency inference, see GH#23789
+    _is_monotonic_increasing = Index.is_monotonic_increasing
+    _is_monotonic_decreasing = Index.is_monotonic_decreasing
+    _is_unique = Index.is_unique
+
     # -------------------------------------------------------------------
 
     @Appender(_index_shared_docs['astype'])
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index 873c7c92cbaf6f..57cf23a39a944b 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -1058,9 +1058,234 @@ def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array):
     # -------------------------------------------------------------
     # RelativeDelta DateOffsets
 
+    def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array):
+        # GH#10699
+        vec = DatetimeIndex([Timestamp('2000-01-05 00:15:00'),
+                             Timestamp('2000-01-31 00:23:00'),
+                             Timestamp('2000-01-01'),
+                             Timestamp('2000-03-31'),
+                             Timestamp('2000-02-29'),
+                             Timestamp('2000-12-31'),
+                             Timestamp('2000-05-15'),
+                             Timestamp('2001-06-15')])
+        vec = tm.box_expected(vec, box_with_array)
+        vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec
+
+        # DateOffset relativedelta fastpath
+        relative_kwargs = [('years', 2), ('months', 5), ('days', 3),
+                           ('hours', 5), ('minutes', 10), ('seconds', 2),
+                           ('microseconds', 5)]
+        for i, kwd in enumerate(relative_kwargs):
+            off = pd.DateOffset(**dict([kwd]))
+
+            expected = DatetimeIndex([x + off for x in vec_items])
+            expected = tm.box_expected(expected, box_with_array)
+            tm.assert_equal(expected, vec + off)
+
+            expected = DatetimeIndex([x - off for x in vec_items])
+            expected = tm.box_expected(expected, box_with_array)
+            tm.assert_equal(expected, vec - off)
+
+            off = pd.DateOffset(**dict(relative_kwargs[:i + 1]))
+
+            expected = DatetimeIndex([x + off for x in vec_items])
+            expected = tm.box_expected(expected, box_with_array)
+            tm.assert_equal(expected, vec + off)
+
+            expected = DatetimeIndex([x - off for x in vec_items])
+            expected = tm.box_expected(expected, box_with_array)
+            tm.assert_equal(expected, vec - off)
+
+            with pytest.raises(TypeError):
+                off - vec
+
     # -------------------------------------------------------------
     # Non-Tick, Non-RelativeDelta DateOffsets
 
+    # TODO: redundant with test_dt64arr_add_sub_DateOffset?  that includes
+    #  tz-aware cases which this does not
+    @pytest.mark.parametrize('cls_and_kwargs', [
+        'YearBegin', ('YearBegin', {'month': 5}),
+        'YearEnd', ('YearEnd', {'month': 5}),
+        'MonthBegin', 'MonthEnd',
+        'SemiMonthEnd', 'SemiMonthBegin',
+        'Week', ('Week', {'weekday': 3}),
+        'Week', ('Week', {'weekday': 6}),
+        'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin',
+        'CustomBusinessDay', 'CDay', 'CBMonthEnd',
+        'CBMonthBegin', 'BMonthBegin', 'BMonthEnd',
+        'BusinessHour', 'BYearBegin', 'BYearEnd',
+        'BQuarterBegin', ('LastWeekOfMonth', {'weekday': 2}),
+        ('FY5253Quarter', {'qtr_with_extra_week': 1,
+                           'startingMonth': 1,
+                           'weekday': 2,
+                           'variation': 'nearest'}),
+        ('FY5253', {'weekday': 0, 'startingMonth': 2, 'variation': 'nearest'}),
+        ('WeekOfMonth', {'weekday': 2, 'week': 2}),
+        'Easter', ('DateOffset', {'day': 4}),
+        ('DateOffset', {'month': 5})])
+    @pytest.mark.parametrize('normalize', [True, False])
+    @pytest.mark.parametrize('n', [0, 5])
+    def test_dt64arr_add_sub_DateOffsets(self, box_with_array,
+                                         n, normalize, cls_and_kwargs):
+        # GH#10699
+        # assert vectorized operation matches pointwise operations
+
+        if isinstance(cls_and_kwargs, tuple):
+            # If cls_name param is a tuple, then 2nd entry is kwargs for
+            # the offset constructor
+            cls_name, kwargs = cls_and_kwargs
+        else:
+            cls_name = cls_and_kwargs
+            kwargs = {}
+
+        if n == 0 and cls_name in ['WeekOfMonth', 'LastWeekOfMonth',
+                                   'FY5253Quarter', 'FY5253']:
+            # passing n = 0 is invalid for these offset classes
+            return
+
+        vec = DatetimeIndex([Timestamp('2000-01-05 00:15:00'),
+                             Timestamp('2000-01-31 00:23:00'),
+                             Timestamp('2000-01-01'),
+                             Timestamp('2000-03-31'),
+                             Timestamp('2000-02-29'),
+                             Timestamp('2000-12-31'),
+                             Timestamp('2000-05-15'),
+                             Timestamp('2001-06-15')])
+        vec = tm.box_expected(vec, box_with_array)
+        vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec
+
+        offset_cls = getattr(pd.offsets, cls_name)
+
+        with warnings.catch_warnings(record=True):
+            # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being
+            # applied to Series or DatetimeIndex
+            # we aren't testing that here, so ignore.
+            warnings.simplefilter("ignore", PerformanceWarning)
+
+            offset = offset_cls(n, normalize=normalize, **kwargs)
+
+            expected = DatetimeIndex([x + offset for x in vec_items])
+            expected = tm.box_expected(expected, box_with_array)
+            tm.assert_equal(expected, vec + offset)
+
+            expected = DatetimeIndex([x - offset for x in vec_items])
+            expected = tm.box_expected(expected, box_with_array)
+            tm.assert_equal(expected, vec - offset)
+
+            expected = DatetimeIndex([offset + x for x in vec_items])
+            expected = tm.box_expected(expected, box_with_array)
+            tm.assert_equal(expected, offset + vec)
+
+            with pytest.raises(TypeError):
+                offset - vec
+
+    def test_dt64arr_add_sub_DateOffset(self, box_with_array):
+        # GH#10699
+        s = date_range('2000-01-01', '2000-01-31', name='a')
+        s = tm.box_expected(s, box_with_array)
+        result = s + pd.DateOffset(years=1)
+        result2 = pd.DateOffset(years=1) + s
+        exp = date_range('2001-01-01', '2001-01-31', name='a')
+        exp = tm.box_expected(exp, box_with_array)
+        tm.assert_equal(result, exp)
+        tm.assert_equal(result2, exp)
+
+        result = s - pd.DateOffset(years=1)
+        exp = date_range('1999-01-01', '1999-01-31', name='a')
+        exp = tm.box_expected(exp, box_with_array)
+        tm.assert_equal(result, exp)
+
+        s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
+                           Timestamp('2000-02-15', tz='US/Central')], name='a')
+        # FIXME: ValueError with tzaware DataFrame transpose
+        s = tm.box_expected(s, box_with_array, transpose=False)
+        result = s + pd.offsets.Day()
+        result2 = pd.offsets.Day() + s
+        exp = DatetimeIndex([Timestamp('2000-01-16 00:15:00', tz='US/Central'),
+                             Timestamp('2000-02-16', tz='US/Central')],
+                            name='a')
+        exp = tm.box_expected(exp, box_with_array, transpose=False)
+        tm.assert_equal(result, exp)
+        tm.assert_equal(result2, exp)
+
+        s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
+                           Timestamp('2000-02-15', tz='US/Central')], name='a')
+        s = tm.box_expected(s, box_with_array, transpose=False)
+        result = s + pd.offsets.MonthEnd()
+        result2 = pd.offsets.MonthEnd() + s
+        exp = DatetimeIndex([Timestamp('2000-01-31 00:15:00', tz='US/Central'),
+                             Timestamp('2000-02-29', tz='US/Central')],
+                            name='a')
+        exp = tm.box_expected(exp, box_with_array, transpose=False)
+        tm.assert_equal(result, exp)
+        tm.assert_equal(result2, exp)
+
+    # TODO: __sub__, __rsub__
+    def test_dt64arr_add_mixed_offset_array(self, box_with_array):
+        # GH#10699
+        # array of offsets
+        s = DatetimeIndex([Timestamp('2000-1-1'), Timestamp('2000-2-1')])
+        s = tm.box_expected(s, box_with_array)
+
+        warn = None if box_with_array is pd.DataFrame else PerformanceWarning
+        with tm.assert_produces_warning(warn,
+                                        clear=[pd.core.arrays.datetimelike]):
+            other = pd.Index([pd.offsets.DateOffset(years=1),
+                              pd.offsets.MonthEnd()])
+            other = tm.box_expected(other, box_with_array)
+            result = s + other
+            exp = DatetimeIndex([Timestamp('2001-1-1'),
+                                 Timestamp('2000-2-29')])
+            exp = tm.box_expected(exp, box_with_array)
+            tm.assert_equal(result, exp)
+
+            # same offset
+            other = pd.Index([pd.offsets.DateOffset(years=1),
+                              pd.offsets.DateOffset(years=1)])
+            other = tm.box_expected(other, box_with_array)
+            result = s + other
+            exp = DatetimeIndex([Timestamp('2001-1-1'),
+                                 Timestamp('2001-2-1')])
+            exp = tm.box_expected(exp, box_with_array)
+            tm.assert_equal(result, exp)
+
+    # TODO: overlap with test_dt64arr_add_mixed_offset_array?
+    def test_dt64arr_add_sub_offset_ndarray(self, tz_naive_fixture,
+                                            box_with_array):
+        # GH#18849
+        if box_with_array is pd.DataFrame:
+            pytest.xfail("FIXME: ValueError with transpose; "
+                         "alignment error without")
+
+        tz = tz_naive_fixture
+        dti = pd.date_range('2017-01-01', periods=2, tz=tz)
+        dtarr = tm.box_expected(dti, box_with_array)
+
+        other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
+
+        warn = None if box_with_array is pd.DataFrame else PerformanceWarning
+        with tm.assert_produces_warning(warn,
+                                        clear=[pd.core.arrays.datetimelike]):
+            res = dtarr + other
+        expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))],
+                                 name=dti.name, freq='infer')
+        expected = tm.box_expected(expected, box_with_array)
+        tm.assert_equal(res, expected)
+
+        with tm.assert_produces_warning(warn,
+                                        clear=[pd.core.arrays.datetimelike]):
+            res2 = other + dtarr
+        tm.assert_equal(res2, expected)
+
+        with tm.assert_produces_warning(warn,
+                                        clear=[pd.core.arrays.datetimelike]):
+            res = dtarr - other
+        expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))],
+                                 name=dti.name, freq='infer')
+        expected = tm.box_expected(expected, box_with_array)
+        tm.assert_equal(res, expected)
+
 
 class TestDatetime64OverflowHandling(object):
     # TODO: box + de-duplicate
@@ -1823,24 +2048,6 @@ def test_dti_add_series(self, tz, names):
         result4 = index + ser.values
         tm.assert_index_equal(result4, expected)
 
-    def test_dti_add_offset_array(self, tz_naive_fixture):
-        # GH#18849
-        tz = tz_naive_fixture
-        dti = pd.date_range('2017-01-01', periods=2, tz=tz)
-        other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
-
-        with tm.assert_produces_warning(PerformanceWarning,
-                                        clear=[pd.core.arrays.datetimelike]):
-            res = dti + other
-        expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))],
-                                 name=dti.name, freq='infer')
-        tm.assert_index_equal(res, expected)
-
-        with tm.assert_produces_warning(PerformanceWarning,
-                                        clear=[pd.core.arrays.datetimelike]):
-            res2 = other + dti
-        tm.assert_index_equal(res2, expected)
-
     @pytest.mark.parametrize('names', [(None, None, None),
                                        ('foo', 'bar', None),
                                        ('foo', 'foo', 'foo')])
@@ -1863,19 +2070,6 @@ def test_dti_add_offset_index(self, tz_naive_fixture, names):
             res2 = other + dti
         tm.assert_index_equal(res2, expected)
 
-    def test_dti_sub_offset_array(self, tz_naive_fixture):
-        # GH#18824
-        tz = tz_naive_fixture
-        dti = pd.date_range('2017-01-01', periods=2, tz=tz)
-        other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)])
-
-        with tm.assert_produces_warning(PerformanceWarning,
-                                        clear=[pd.core.arrays.datetimelike]):
-            res = dti - other
-        expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))],
-                                 name=dti.name, freq='infer')
-        tm.assert_index_equal(res, expected)
-
     @pytest.mark.parametrize('names', [(None, None, None),
                                        ('foo', 'bar', None),
                                        ('foo', 'foo', 'foo')])
@@ -1925,198 +2119,6 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names):
         tm.assert_series_equal(res3, expected_sub)
 
 
-def test_dt64_with_offset_array(box_with_array):
-    # GH#10699
-    # array of offsets
-    s = DatetimeIndex([Timestamp('2000-1-1'), Timestamp('2000-2-1')])
-    s = tm.box_expected(s, box_with_array)
-
-    warn = PerformanceWarning if box_with_array is not pd.DataFrame else None
-    with tm.assert_produces_warning(warn,
-                                    clear=[pd.core.arrays.datetimelike]):
-        other = pd.Index([pd.offsets.DateOffset(years=1),
-                          pd.offsets.MonthEnd()])
-        other = tm.box_expected(other, box_with_array)
-        result = s + other
-        exp = DatetimeIndex([Timestamp('2001-1-1'), Timestamp('2000-2-29')])
-        exp = tm.box_expected(exp, box_with_array)
-        tm.assert_equal(result, exp)
-
-        # same offset
-        other = pd.Index([pd.offsets.DateOffset(years=1),
-                          pd.offsets.DateOffset(years=1)])
-        other = tm.box_expected(other, box_with_array)
-        result = s + other
-        exp = DatetimeIndex([Timestamp('2001-1-1'), Timestamp('2001-2-1')])
-        exp = tm.box_expected(exp, box_with_array)
-        tm.assert_equal(result, exp)
-
-
-def test_dt64_with_DateOffsets_relativedelta(box_with_array):
-    # GH#10699
-    if box_with_array is tm.to_array:
-        pytest.xfail("apply_index implementations are Index-specific")
-
-    vec = DatetimeIndex([Timestamp('2000-01-05 00:15:00'),
-                         Timestamp('2000-01-31 00:23:00'),
-                         Timestamp('2000-01-01'),
-                         Timestamp('2000-03-31'),
-                         Timestamp('2000-02-29'),
-                         Timestamp('2000-12-31'),
-                         Timestamp('2000-05-15'),
-                         Timestamp('2001-06-15')])
-    vec = tm.box_expected(vec, box_with_array)
-    vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec
-
-    # DateOffset relativedelta fastpath
-    relative_kwargs = [('years', 2), ('months', 5), ('days', 3),
-                       ('hours', 5), ('minutes', 10), ('seconds', 2),
-                       ('microseconds', 5)]
-    for i, kwd in enumerate(relative_kwargs):
-        off = pd.DateOffset(**dict([kwd]))
-
-        expected = DatetimeIndex([x + off for x in vec_items])
-        expected = tm.box_expected(expected, box_with_array)
-        tm.assert_equal(expected, vec + off)
-
-        expected = DatetimeIndex([x - off for x in vec_items])
-        expected = tm.box_expected(expected, box_with_array)
-        tm.assert_equal(expected, vec - off)
-
-        off = pd.DateOffset(**dict(relative_kwargs[:i + 1]))
-
-        expected = DatetimeIndex([x + off for x in vec_items])
-        expected = tm.box_expected(expected, box_with_array)
-        tm.assert_equal(expected, vec + off)
-
-        expected = DatetimeIndex([x - off for x in vec_items])
-        expected = tm.box_expected(expected, box_with_array)
-        tm.assert_equal(expected, vec - off)
-
-        with pytest.raises(TypeError):
-            off - vec
-
-
-@pytest.mark.parametrize('cls_and_kwargs', [
-    'YearBegin', ('YearBegin', {'month': 5}),
-    'YearEnd', ('YearEnd', {'month': 5}),
-    'MonthBegin', 'MonthEnd',
-    'SemiMonthEnd', 'SemiMonthBegin',
-    'Week', ('Week', {'weekday': 3}),
-    'Week', ('Week', {'weekday': 6}),
-    'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin',
-    'CustomBusinessDay', 'CDay', 'CBMonthEnd',
-    'CBMonthBegin', 'BMonthBegin', 'BMonthEnd',
-    'BusinessHour', 'BYearBegin', 'BYearEnd',
-    'BQuarterBegin', ('LastWeekOfMonth', {'weekday': 2}),
-    ('FY5253Quarter', {'qtr_with_extra_week': 1,
-                       'startingMonth': 1,
-                       'weekday': 2,
-                       'variation': 'nearest'}),
-    ('FY5253', {'weekday': 0, 'startingMonth': 2, 'variation': 'nearest'}),
-    ('WeekOfMonth', {'weekday': 2, 'week': 2}),
-    'Easter', ('DateOffset', {'day': 4}),
-    ('DateOffset', {'month': 5})])
-@pytest.mark.parametrize('normalize', [True, False])
-def test_dt64_with_DateOffsets(box_with_array, normalize, cls_and_kwargs):
-    # GH#10699
-    # assert these are equal on a piecewise basis
-    if box_with_array is tm.to_array:
-        pytest.xfail("apply_index implementations are Index-specific")
-
-    vec = DatetimeIndex([Timestamp('2000-01-05 00:15:00'),
-                         Timestamp('2000-01-31 00:23:00'),
-                         Timestamp('2000-01-01'),
-                         Timestamp('2000-03-31'),
-                         Timestamp('2000-02-29'),
-                         Timestamp('2000-12-31'),
-                         Timestamp('2000-05-15'),
-                         Timestamp('2001-06-15')])
-    vec = tm.box_expected(vec, box_with_array)
-    vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec
-
-    if isinstance(cls_and_kwargs, tuple):
-        # If cls_name param is a tuple, then 2nd entry is kwargs for
-        # the offset constructor
-        cls_name, kwargs = cls_and_kwargs
-    else:
-        cls_name = cls_and_kwargs
-        kwargs = {}
-
-    offset_cls = getattr(pd.offsets, cls_name)
-
-    with warnings.catch_warnings(record=True):
-        # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being
-        # applied to Series or DatetimeIndex
-        # we aren't testing that here, so ignore.
-        warnings.simplefilter("ignore", PerformanceWarning)
-        for n in [0, 5]:
-            if (cls_name in ['WeekOfMonth', 'LastWeekOfMonth',
-                             'FY5253Quarter', 'FY5253'] and n == 0):
-                # passing n = 0 is invalid for these offset classes
-                continue
-
-            offset = offset_cls(n, normalize=normalize, **kwargs)
-
-            expected = DatetimeIndex([x + offset for x in vec_items])
-            expected = tm.box_expected(expected, box_with_array)
-            tm.assert_equal(expected, vec + offset)
-
-            expected = DatetimeIndex([x - offset for x in vec_items])
-            expected = tm.box_expected(expected, box_with_array)
-            tm.assert_equal(expected, vec - offset)
-
-            expected = DatetimeIndex([offset + x for x in vec_items])
-            expected = tm.box_expected(expected, box_with_array)
-            tm.assert_equal(expected, offset + vec)
-
-            with pytest.raises(TypeError):
-                offset - vec
-
-
-def test_datetime64_with_DateOffset(box_with_array):
-    # GH#10699
-    if box_with_array is tm.to_array:
-        pytest.xfail("DateOffset.apply_index uses _shallow_copy")
-
-    s = date_range('2000-01-01', '2000-01-31', name='a')
-    s = tm.box_expected(s, box_with_array)
-    result = s + pd.DateOffset(years=1)
-    result2 = pd.DateOffset(years=1) + s
-    exp = date_range('2001-01-01', '2001-01-31', name='a')
-    exp = tm.box_expected(exp, box_with_array)
-    tm.assert_equal(result, exp)
-    tm.assert_equal(result2, exp)
-
-    result = s - pd.DateOffset(years=1)
-    exp = date_range('1999-01-01', '1999-01-31', name='a')
-    exp = tm.box_expected(exp, box_with_array)
-    tm.assert_equal(result, exp)
-
-    s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
-                       Timestamp('2000-02-15', tz='US/Central')], name='a')
-    # FIXME: ValueError with tzaware DataFrame transpose
-    s = tm.box_expected(s, box_with_array, transpose=False)
-    result = s + pd.offsets.Day()
-    result2 = pd.offsets.Day() + s
-    exp = DatetimeIndex([Timestamp('2000-01-16 00:15:00', tz='US/Central'),
-                         Timestamp('2000-02-16', tz='US/Central')], name='a')
-    exp = tm.box_expected(exp, box_with_array, transpose=False)
-    tm.assert_equal(result, exp)
-    tm.assert_equal(result2, exp)
-
-    s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
-                       Timestamp('2000-02-15', tz='US/Central')], name='a')
-    s = tm.box_expected(s, box_with_array, transpose=False)
-    result = s + pd.offsets.MonthEnd()
-    result2 = pd.offsets.MonthEnd() + s
-    exp = DatetimeIndex([Timestamp('2000-01-31 00:15:00', tz='US/Central'),
-                         Timestamp('2000-02-29', tz='US/Central')], name='a')
-    exp = tm.box_expected(exp, box_with_array, transpose=False)
-    tm.assert_equal(result, exp)
-    tm.assert_equal(result2, exp)
-
-
 @pytest.mark.parametrize('years', [-1, 0, 1])
 @pytest.mark.parametrize('months', [-2, 0, 2])
 def test_shift_months(years, months):
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
index 58c7216f0eecef..2b300cb1012012 100644
--- a/pandas/tests/arithmetic/test_timedelta64.py
+++ b/pandas/tests/arithmetic/test_timedelta64.py
@@ -452,10 +452,6 @@ def test_td64arr_add_timestamp(self, box_with_array, tz_naive_fixture):
 
     def test_td64arr_add_sub_timestamp(self, box_with_array):
         # GH#11925
-        if box_with_array is tm.to_array:
-            pytest.xfail("DatetimeArray.__sub__ returns ndarray instead "
-                         "of TimedeltaArray")
-
         ts = Timestamp('2012-01-01')
         # TODO: parametrize over types of datetime scalar?
 
@@ -1105,6 +1101,7 @@ def test_tdi_rmul_arraylike(self, other, box_with_array):
 
         tdi = TimedeltaIndex(['1 Day'] * 10)
         expected = timedelta_range('1 days', '10 days')
+        expected._freq = None
 
         tdi = tm.box_expected(tdi, box)
         expected = tm.box_expected(expected, xbox)
diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py
index 074c8904b55b16..ba20febfeafad3 100644
--- a/pandas/tests/indexes/timedeltas/test_construction.py
+++ b/pandas/tests/indexes/timedeltas/test_construction.py
@@ -6,6 +6,7 @@
 import pandas as pd
 import pandas.util.testing as tm
 from pandas import TimedeltaIndex, timedelta_range, to_timedelta, Timedelta
+from pandas.core.arrays import TimedeltaArrayMixin as TimedeltaArray
 
 
 class TestTimedeltaIndex(object):
@@ -41,6 +42,10 @@ def test_infer_from_tdi_mismatch(self):
         with pytest.raises(ValueError, match=msg):
             TimedeltaIndex(tdi, freq='D')
 
+        with pytest.raises(ValueError, match=msg):
+            # GH#23789
+            TimedeltaArray(tdi, freq='D')
+
     def test_dt64_data_invalid(self):
         # GH#23539
         # passing tz-aware DatetimeIndex raises, naive or ndarray[datetime64]
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index 97ef91a02dfb8b..8cdec31d7ce8a8 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -295,8 +295,8 @@ def __init__(self, index, warn=True):
         if len(index) < 3:
             raise ValueError('Need at least 3 dates to infer frequency')
 
-        self.is_monotonic = (self.index.is_monotonic_increasing or
-                             self.index.is_monotonic_decreasing)
+        self.is_monotonic = (self.index._is_monotonic_increasing or
+                             self.index._is_monotonic_decreasing)
 
     @cache_readonly
     def deltas(self):
@@ -323,7 +323,7 @@ def get_freq(self):  # noqa:F811
         -------
         freqstr : str or None
         """
-        if not self.is_monotonic or not self.index.is_unique:
+        if not self.is_monotonic or not self.index._is_unique:
             return None
 
         delta = self.deltas[0]
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 2341b3a1605c05..45f10a2f06fa26 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -275,14 +275,17 @@ def apply_index(self, i):
                        kwds.get('months', 0)) * self.n)
             if months:
                 shifted = liboffsets.shift_months(i.asi8, months)
-                i = i._shallow_copy(shifted)
+                i = type(i)(shifted, freq=i.freq, dtype=i.dtype)
 
             weeks = (kwds.get('weeks', 0)) * self.n
             if weeks:
                 # integer addition on PeriodIndex is deprecated,
                 #   so we directly use _time_shift instead
                 asper = i.to_period('W')
-                shifted = asper._data._time_shift(weeks)
+                if not isinstance(asper._data, np.ndarray):
+                    # unwrap PeriodIndex --> PeriodArray
+                    asper = asper._data
+                shifted = asper._time_shift(weeks)
                 i = shifted.to_timestamp() + i.to_perioddelta('W')
 
             timedelta_kwds = {k: v for k, v in kwds.items()
@@ -539,17 +542,21 @@ def apply_index(self, i):
         # to_period rolls forward to next BDay; track and
         # reduce n where it does when rolling forward
         asper = i.to_period('B')
+        if not isinstance(asper._data, np.ndarray):
+            # unwrap PeriodIndex --> PeriodArray
+            asper = asper._data
+
         if self.n > 0:
             shifted = (i.to_perioddelta('B') - time).asi8 != 0
 
             # Integer-array addition is deprecated, so we use
             # _time_shift directly
             roll = np.where(shifted, self.n - 1, self.n)
-            shifted = asper._data._addsub_int_array(roll, operator.add)
+            shifted = asper._addsub_int_array(roll, operator.add)
         else:
             # Integer addition is deprecated, so we use _time_shift directly
             roll = self.n
-            shifted = asper._data._time_shift(roll)
+            shifted = asper._time_shift(roll)
 
         result = shifted.to_timestamp() + time
         return result
@@ -926,7 +933,9 @@ def apply(self, other):
     @apply_index_wraps
     def apply_index(self, i):
         shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt)
-        return i._shallow_copy(shifted)
+        # TODO: going through __new__ raises on call to _validate_frequency;
+        #  are we passing incorrect freq?
+        return type(i)._simple_new(shifted, freq=i.freq, tz=i.tz)
 
 
 class MonthEnd(MonthOffset):
@@ -1140,7 +1149,11 @@ def apply_index(self, i):
         # integer-array addition on PeriodIndex is deprecated,
         #  so we use _addsub_int_array directly
         asper = i.to_period('M')
-        shifted = asper._data._addsub_int_array(roll // 2, operator.add)
+        if not isinstance(asper._data, np.ndarray):
+            # unwrap PeriodIndex --> PeriodArray
+            asper = asper._data
+
+        shifted = asper._addsub_int_array(roll // 2, operator.add)
         i = type(dti)(shifted.to_timestamp())
 
         # apply the correct day
@@ -1329,7 +1342,12 @@ def apply_index(self, i):
         if self.weekday is None:
             # integer addition on PeriodIndex is deprecated,
             #  so we use _time_shift directly
-            shifted = i.to_period('W')._data._time_shift(self.n)
+            asper = i.to_period('W')
+            if not isinstance(asper._data, np.ndarray):
+                # unwrap PeriodIndex --> PeriodArray
+                asper = asper._data
+
+            shifted = asper._time_shift(self.n)
             return shifted.to_timestamp() + i.to_perioddelta('W')
         else:
             return self._end_apply_index(i)
@@ -1351,6 +1369,10 @@ def _end_apply_index(self, dtindex):
 
         base, mult = libfrequencies.get_freq_code(self.freqstr)
         base_period = dtindex.to_period(base)
+        if not isinstance(base_period._data, np.ndarray):
+            # unwrap PeriodIndex --> PeriodArray
+            base_period = base_period._data
+
         if self.n > 0:
             # when adding, dates on end roll to next
             normed = dtindex - off + Timedelta(1, 'D') - Timedelta(1, 'ns')
@@ -1358,13 +1380,13 @@ def _end_apply_index(self, dtindex):
                             self.n, self.n - 1)
             # integer-array addition on PeriodIndex is deprecated,
             #  so we use _addsub_int_array directly
-            shifted = base_period._data._addsub_int_array(roll, operator.add)
+            shifted = base_period._addsub_int_array(roll, operator.add)
             base = shifted.to_timestamp(how='end')
         else:
             # integer addition on PeriodIndex is deprecated,
             #  so we use _time_shift directly
             roll = self.n
-            base = base_period._data._time_shift(roll).to_timestamp(how='end')
+            base = base_period._time_shift(roll).to_timestamp(how='end')
 
         return base + off + Timedelta(1, 'ns') - Timedelta(1, 'D')
 
@@ -1617,7 +1639,10 @@ def onOffset(self, dt):
     def apply_index(self, dtindex):
         shifted = liboffsets.shift_quarters(dtindex.asi8, self.n,
                                             self.startingMonth, self._day_opt)
-        return dtindex._shallow_copy(shifted)
+        # TODO: going through __new__ raises on call to _validate_frequency;
+        #  are we passing incorrect freq?
+        return type(dtindex)._simple_new(shifted, freq=dtindex.freq,
+                                         tz=dtindex.tz)
 
 
 class BQuarterEnd(QuarterOffset):
@@ -1694,7 +1719,10 @@ def apply_index(self, dtindex):
         shifted = liboffsets.shift_quarters(dtindex.asi8, self.n,
                                             self.month, self._day_opt,
                                             modby=12)
-        return dtindex._shallow_copy(shifted)
+        # TODO: going through __new__ raises on call to _validate_frequency;
+        #  are we passing incorrect freq?
+        return type(dtindex)._simple_new(shifted, freq=dtindex.freq,
+                                         tz=dtindex.tz)
 
     def onOffset(self, dt):
         if self.normalize and not _is_normalized(dt):

From 6b3490f4c679852d71e0be17a1d8b3bc33707e2a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 28 Nov 2018 15:34:57 -0800
Subject: [PATCH 27/78] DEPR: Deprecate passing range-like arguments to
 DatetimeIndex, TimedeltaIndex (#23919)

---
 doc/source/whatsnew/v0.24.0.rst               |  1 +
 pandas/core/indexes/datetimes.py              | 49 ++++++++++-----
 pandas/core/indexes/timedeltas.py             | 22 +++++--
 pandas/core/resample.py                       | 20 +++----
 pandas/io/packers.py                          |  4 +-
 pandas/tests/frame/test_indexing.py           |  2 +-
 pandas/tests/groupby/test_groupby.py          |  4 +-
 pandas/tests/groupby/test_timegrouper.py      |  8 +--
 .../indexes/datetimes/test_construction.py    | 59 +++++++++++--------
 .../indexes/datetimes/test_date_range.py      |  8 +--
 .../tests/indexes/datetimes/test_datetime.py  |  8 +--
 .../tests/indexes/datetimes/test_formats.py   |  2 +-
 .../tests/indexes/datetimes/test_indexing.py  |  2 +-
 pandas/tests/indexes/datetimes/test_misc.py   | 36 +++++------
 pandas/tests/indexes/datetimes/test_ops.py    |  5 +-
 .../indexes/datetimes/test_partial_slicing.py | 30 +++++-----
 .../indexes/datetimes/test_scalar_compat.py   |  2 +-
 pandas/tests/indexes/datetimes/test_setops.py | 12 ++--
 .../tests/indexes/datetimes/test_timezones.py | 10 ++--
 pandas/tests/indexes/period/test_indexing.py  |  8 +--
 .../indexes/period/test_partial_slicing.py    | 15 +++--
 pandas/tests/indexes/period/test_tools.py     |  6 +-
 .../indexes/timedeltas/test_construction.py   | 21 +++++--
 .../tests/indexes/timedeltas/test_indexing.py |  2 +-
 .../tests/indexes/timedeltas/test_setops.py   |  2 +-
 pandas/tests/io/formats/test_to_html.py       |  2 +-
 pandas/tests/io/parser/test_parse_dates.py    |  6 +-
 pandas/tests/plotting/test_datetimelike.py    |  2 +-
 pandas/tests/resample/test_datetime_index.py  | 34 +++++------
 pandas/tests/resample/test_period_index.py    |  7 +--
 pandas/tests/resample/test_resample_api.py    |  5 +-
 pandas/tests/reshape/merge/test_merge_asof.py | 18 +++---
 pandas/tests/reshape/test_concat.py           |  6 +-
 pandas/tests/series/indexing/test_datetime.py | 10 ++--
 pandas/tests/series/test_datetime_values.py   |  6 +-
 pandas/tests/tseries/offsets/test_offsets.py  | 30 +++++-----
 pandas/tseries/holiday.py                     |  8 +--
 pandas/util/testing.py                        |  6 +-
 38 files changed, 266 insertions(+), 212 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 7e63fff16bc233..c74bcb505b6beb 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1076,6 +1076,7 @@ Deprecations
 - Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`)
 - :func:`pandas.types.is_period` is deprecated in favor of `pandas.types.is_period_dtype` (:issue:`23917`)
 - :func:`pandas.types.is_datetimetz` is deprecated in favor of `pandas.types.is_datetime64tz` (:issue:`23917`)
+- Creating a :class:`TimedeltaIndex` or :class:`DatetimeIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range` and :func:`date_range` (:issue:`23919`)
 
 .. _whatsnew_0240.deprecations.datetimelike_int_ops:
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 965d07a438d710..01901d022da322 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -50,7 +50,13 @@ def _new_DatetimeIndex(cls, d):
     # so need to localize
     tz = d.pop('tz', None)
 
-    result = cls.__new__(cls, verify_integrity=False, **d)
+    with warnings.catch_warnings():
+        # we ignore warnings from passing verify_integrity=False
+        # TODO: If we knew what was going in to **d, we might be able to
+        #  go through _simple_new instead
+        warnings.simplefilter("ignore")
+        result = cls.__new__(cls, verify_integrity=False, **d)
+
     if tz is not None:
         result = result.tz_localize('UTC').tz_convert(tz)
     return result
@@ -220,10 +226,20 @@ def __new__(cls, data=None,
                 freq=None, start=None, end=None, periods=None, tz=None,
                 normalize=False, closed=None, ambiguous='raise',
                 dayfirst=False, yearfirst=False, dtype=None,
-                copy=False, name=None, verify_integrity=True):
+                copy=False, name=None, verify_integrity=None):
+
+        if verify_integrity is not None:
+            warnings.warn("The 'verify_integrity' argument is deprecated, "
+                          "will be removed in a future version.",
+                          FutureWarning, stacklevel=2)
+        else:
+            verify_integrity = True
 
         if data is None:
-            # TODO: Remove this block and associated kwargs; GH#20535
+            warnings.warn("Creating a DatetimeIndex by passing range "
+                          "endpoints is deprecated.  Use "
+                          "`pandas.date_range` instead.",
+                          FutureWarning, stacklevel=2)
             result = cls._generate_range(start, end, periods,
                                          freq=freq, tz=tz, normalize=normalize,
                                          closed=closed, ambiguous=ambiguous)
@@ -756,8 +772,8 @@ def snap(self, freq='S'):
             snapped[i] = s
 
         # we know it conforms; skip check
-        return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
-        # TODO: what about self.name?  if so, use shallow_copy?
+        return DatetimeIndex._simple_new(snapped, freq=freq)
+        # TODO: what about self.name?  tz? if so, use shallow_copy?
 
     def unique(self, level=None):
         if level is not None:
@@ -1519,9 +1535,13 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None,
     if freq is None and com._any_none(periods, start, end):
         freq = 'D'
 
-    return DatetimeIndex(start=start, end=end, periods=periods,
-                         freq=freq, tz=tz, normalize=normalize, name=name,
-                         closed=closed, **kwargs)
+    result = DatetimeIndex._generate_range(
+        start=start, end=end, periods=periods,
+        freq=freq, tz=tz, normalize=normalize,
+        closed=closed, **kwargs)
+
+    result.name = name
+    return result
 
 
 def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
@@ -1607,9 +1627,9 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
                'weekmask are passed, got frequency {freq}').format(freq=freq)
         raise ValueError(msg)
 
-    return DatetimeIndex(start=start, end=end, periods=periods,
-                         freq=freq, tz=tz, normalize=normalize, name=name,
-                         closed=closed, **kwargs)
+    return date_range(start=start, end=end, periods=periods,
+                      freq=freq, tz=tz, normalize=normalize, name=name,
+                      closed=closed, **kwargs)
 
 
 def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
@@ -1666,9 +1686,10 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
         holidays = kwargs.pop('holidays', [])
         weekmask = kwargs.pop('weekmask', 'Mon Tue Wed Thu Fri')
         freq = CDay(holidays=holidays, weekmask=weekmask)
-    return DatetimeIndex(start=start, end=end, periods=periods, freq=freq,
-                         tz=tz, normalize=normalize, name=name,
-                         closed=closed, **kwargs)
+
+    return date_range(start=start, end=end, periods=periods, freq=freq,
+                      tz=tz, normalize=normalize, name=name,
+                      closed=closed, **kwargs)
 
 
 def _time_to_micros(time):
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index b5136a60b94dfb..63b1ac6a99503d 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -1,5 +1,6 @@
 """ implement the TimedeltaIndex """
 from datetime import datetime
+import warnings
 
 import numpy as np
 
@@ -132,12 +133,22 @@ def _join_i8_wrapper(joinf, **kwargs):
 
     def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
                 periods=None, closed=None, dtype=None, copy=False,
-                name=None, verify_integrity=True):
+                name=None, verify_integrity=None):
+
+        if verify_integrity is not None:
+            warnings.warn("The 'verify_integrity' argument is deprecated, "
+                          "will be removed in a future version.",
+                          FutureWarning, stacklevel=2)
+        else:
+            verify_integrity = True
 
         freq, freq_infer = dtl.maybe_infer_freq(freq)
 
         if data is None:
-            # TODO: Remove this block and associated kwargs; GH#20535
+            warnings.warn("Creating a TimedeltaIndex by passing range "
+                          "endpoints is deprecated.  Use "
+                          "`pandas.timedelta_range` instead.",
+                          FutureWarning, stacklevel=2)
             result = cls._generate_range(start, end, periods, freq,
                                          closed=closed)
             result.name = name
@@ -740,5 +751,8 @@ def timedelta_range(start=None, end=None, periods=None, freq=None,
     if freq is None and com._any_none(periods, start, end):
         freq = 'D'
 
-    return TimedeltaIndex(start=start, end=end, periods=periods,
-                          freq=freq, name=name, closed=closed)
+    freq, freq_infer = dtl.maybe_infer_freq(freq)
+    result = TimedeltaIndex._generate_range(start, end, periods, freq,
+                                            closed=closed)
+    result.name = name
+    return result
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 1e3fa166a9b4c6..f2cf17f8f060dc 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -26,7 +26,7 @@
 from pandas.core.groupby.ops import BinGrouper
 from pandas.core.indexes.datetimes import DatetimeIndex, date_range
 from pandas.core.indexes.period import PeriodIndex
-from pandas.core.indexes.timedeltas import TimedeltaIndex
+from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
 
 from pandas.tseries.frequencies import is_subperiod, is_superperiod, to_offset
 from pandas.tseries.offsets import (
@@ -1398,11 +1398,11 @@ def _get_time_bins(self, ax):
         # because replace() will swallow the nanosecond part
         # thus last bin maybe slightly before the end if the end contains
         # nanosecond part and lead to `Values falls after last bin` error
-        binner = labels = DatetimeIndex(freq=self.freq,
-                                        start=first,
-                                        end=last,
-                                        tz=tz,
-                                        name=ax.name)
+        binner = labels = date_range(freq=self.freq,
+                                     start=first,
+                                     end=last,
+                                     tz=tz,
+                                     name=ax.name)
 
         # GH 15549
         # In edge case of tz-aware resapmling binner last index can be
@@ -1484,10 +1484,10 @@ def _get_time_delta_bins(self, ax):
             return binner, [], labels
 
         start, end = ax.min(), ax.max()
-        labels = binner = TimedeltaIndex(start=start,
-                                         end=end,
-                                         freq=self.freq,
-                                         name=ax.name)
+        labels = binner = timedelta_range(start=start,
+                                          end=end,
+                                          freq=self.freq,
+                                          name=ax.name)
 
         end_stamps = labels + self.freq
         bins = ax.searchsorted(end_stamps, side='left')
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index 7fc770efbeb377..af4d7c69e01c77 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -604,8 +604,8 @@ def decode(obj):
 
     elif typ == u'datetime_index':
         data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
-        d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)
-        result = globals()[obj[u'klass']](data, **d)
+        d = dict(name=obj[u'name'], freq=obj[u'freq'])
+        result = DatetimeIndex._simple_new(data, **d)
         tz = obj[u'tz']
 
         # reverse tz conversion
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
index b41d432cea7ca4..de810a656c3eaf 100644
--- a/pandas/tests/frame/test_indexing.py
+++ b/pandas/tests/frame/test_indexing.py
@@ -647,7 +647,7 @@ def test_setitem_boolean_column(self):
 
     def test_frame_setitem_timestamp(self):
         # GH#2155
-        columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', freq=BDay())
+        columns = date_range(start='1/1/2012', end='2/1/2012', freq=BDay())
         index = lrange(10)
         data = DataFrame(columns=columns, index=index)
         t = datetime(2012, 11, 1)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index e92e5a70b263f8..d5147790a66a1c 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -8,7 +8,7 @@
 
 from pandas import (date_range, Timestamp,
                     Index, MultiIndex, DataFrame, Series,
-                    Panel, DatetimeIndex, read_csv)
+                    Panel, read_csv)
 from pandas.errors import PerformanceWarning
 from pandas.util.testing import (assert_frame_equal,
                                  assert_series_equal, assert_almost_equal)
@@ -1438,7 +1438,7 @@ def test_groupby_sort_multiindex_series():
 def test_groupby_reindex_inside_function():
 
     periods = 1000
-    ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods)
+    ind = date_range(start='2012/1/1', freq='5min', periods=periods)
     df = DataFrame({'high': np.arange(
         periods), 'low': np.arange(periods)}, index=ind)
 
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index 23b51b93d6a72d..183ccfb5182a24 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -9,7 +9,7 @@
 
 import pandas as pd
 from pandas import (DataFrame, date_range, Index,
-                    Series, MultiIndex, Timestamp, DatetimeIndex)
+                    Series, MultiIndex, Timestamp)
 from pandas.core.groupby.ops import BinGrouper
 from pandas.compat import StringIO
 from pandas.util import testing as tm
@@ -374,9 +374,9 @@ def sumfunc_value(x):
                             expected.reset_index(drop=True))
 
     def test_groupby_groups_datetimeindex(self):
-        # #1430
+        # GH#1430
         periods = 1000
-        ind = DatetimeIndex(start='2012/1/1', freq='5min', periods=periods)
+        ind = pd.date_range(start='2012/1/1', freq='5min', periods=periods)
         df = DataFrame({'high': np.arange(periods),
                         'low': np.arange(periods)}, index=ind)
         grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day))
@@ -385,7 +385,7 @@ def test_groupby_groups_datetimeindex(self):
         groups = grouped.groups
         assert isinstance(list(groups.keys())[0], datetime)
 
-        # GH 11442
+        # GH#11442
         index = pd.date_range('2015/01/01', periods=5, name='date')
         df = pd.DataFrame({'A': [5, 6, 7, 8, 9],
                            'B': [1, 2, 3, 4, 5]}, index=index)
diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py
index 6651a27098630d..5de79044bc2390 100644
--- a/pandas/tests/indexes/datetimes/test_construction.py
+++ b/pandas/tests/indexes/datetimes/test_construction.py
@@ -347,6 +347,16 @@ def test_construction_with_ndarray(self):
                                  freq='B')
         tm.assert_index_equal(result, expected)
 
+    def test_verify_integrity_deprecated(self):
+        # GH#23919
+        with tm.assert_produces_warning(FutureWarning):
+            DatetimeIndex(['1/1/2000'], verify_integrity=False)
+
+    def test_range_kwargs_deprecated(self):
+        # GH#23919
+        with tm.assert_produces_warning(FutureWarning):
+            DatetimeIndex(start='1/1/2000', end='1/10/2000', freq='D')
+
     def test_constructor_coverage(self):
         rng = date_range('1/1/2000', periods=10.5)
         exp = date_range('1/1/2000', periods=10)
@@ -354,10 +364,11 @@ def test_constructor_coverage(self):
 
         msg = 'periods must be a number, got foo'
         with pytest.raises(TypeError, match=msg):
-            DatetimeIndex(start='1/1/2000', periods='foo', freq='D')
+            date_range(start='1/1/2000', periods='foo', freq='D')
 
-        pytest.raises(ValueError, DatetimeIndex, start='1/1/2000',
-                      end='1/10/2000')
+        with pytest.raises(ValueError):
+            with tm.assert_produces_warning(FutureWarning):
+                DatetimeIndex(start='1/1/2000', end='1/10/2000')
 
         with pytest.raises(TypeError):
             DatetimeIndex('1/1/2000')
@@ -391,11 +402,11 @@ def test_constructor_coverage(self):
         pytest.raises(ValueError, DatetimeIndex,
                       ['2000-01-01', '2000-01-02', '2000-01-04'], freq='D')
 
-        pytest.raises(ValueError, DatetimeIndex, start='2011-01-01',
+        pytest.raises(ValueError, date_range, start='2011-01-01',
                       freq='b')
-        pytest.raises(ValueError, DatetimeIndex, end='2011-01-01',
+        pytest.raises(ValueError, date_range, end='2011-01-01',
                       freq='B')
-        pytest.raises(ValueError, DatetimeIndex, periods=10, freq='D')
+        pytest.raises(ValueError, date_range, periods=10, freq='D')
 
     @pytest.mark.parametrize('freq', ['AS', 'W-SUN'])
     def test_constructor_datetime64_tzformat(self, freq):
@@ -476,8 +487,8 @@ def test_constructor_dtype(self):
         tm.assert_index_equal(idx, result)
 
     def test_constructor_name(self):
-        idx = DatetimeIndex(start='2000-01-01', periods=1, freq='A',
-                            name='TEST')
+        idx = date_range(start='2000-01-01', periods=1, freq='A',
+                         name='TEST')
         assert idx.name == 'TEST'
 
     def test_000constructor_resolution(self):
@@ -500,7 +511,7 @@ def test_constructor_start_end_with_tz(self, tz):
         # GH 18595
         start = Timestamp('2013-01-01 06:00:00', tz='America/Los_Angeles')
         end = Timestamp('2013-01-02 06:00:00', tz='America/Los_Angeles')
-        result = DatetimeIndex(freq='D', start=start, end=end, tz=tz)
+        result = date_range(freq='D', start=start, end=end, tz=tz)
         expected = DatetimeIndex(['2013-01-01 06:00:00',
                                   '2013-01-02 06:00:00'],
                                  tz='America/Los_Angeles')
@@ -616,7 +627,7 @@ def test_ctor_str_intraday(self):
         assert rng[0].second == 1
 
     def test_is_(self):
-        dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M')
+        dti = date_range(start='1/1/2005', end='12/1/2005', freq='M')
         assert dti.is_(dti)
         assert dti.is_(dti.view())
         assert not dti.is_(dti.copy())
@@ -644,12 +655,12 @@ def test_constructor_int64_nocopy(self):
     @pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B', 'BH',
                                       'T', 'S', 'L', 'U', 'H', 'N', 'C'])
     def test_from_freq_recreate_from_data(self, freq):
-        org = DatetimeIndex(start='2001/02/01 09:00', freq=freq, periods=1)
+        org = date_range(start='2001/02/01 09:00', freq=freq, periods=1)
         idx = DatetimeIndex(org, freq=freq)
         tm.assert_index_equal(idx, org)
 
-        org = DatetimeIndex(start='2001/02/01 09:00', freq=freq,
-                            tz='US/Pacific', periods=1)
+        org = date_range(start='2001/02/01 09:00', freq=freq,
+                         tz='US/Pacific', periods=1)
         idx = DatetimeIndex(org, freq=freq, tz='US/Pacific')
         tm.assert_index_equal(idx, org)
 
@@ -688,30 +699,30 @@ def test_datetimeindex_constructor_misc(self):
 
         sdate = datetime(1999, 12, 25)
         edate = datetime(2000, 1, 1)
-        idx = DatetimeIndex(start=sdate, freq='1B', periods=20)
+        idx = date_range(start=sdate, freq='1B', periods=20)
         assert len(idx) == 20
         assert idx[0] == sdate + 0 * offsets.BDay()
         assert idx.freq == 'B'
 
-        idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20)
+        idx = date_range(end=edate, freq=('D', 5), periods=20)
         assert len(idx) == 20
         assert idx[-1] == edate
         assert idx.freq == '5D'
 
-        idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN')
-        idx2 = DatetimeIndex(start=sdate, end=edate,
-                             freq=offsets.Week(weekday=6))
+        idx1 = date_range(start=sdate, end=edate, freq='W-SUN')
+        idx2 = date_range(start=sdate, end=edate,
+                          freq=offsets.Week(weekday=6))
         assert len(idx1) == len(idx2)
         assert idx1.freq == idx2.freq
 
-        idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS')
-        idx2 = DatetimeIndex(start=sdate, end=edate,
-                             freq=offsets.QuarterBegin(startingMonth=1))
+        idx1 = date_range(start=sdate, end=edate, freq='QS')
+        idx2 = date_range(start=sdate, end=edate,
+                          freq=offsets.QuarterBegin(startingMonth=1))
         assert len(idx1) == len(idx2)
         assert idx1.freq == idx2.freq
 
-        idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ')
-        idx2 = DatetimeIndex(start=sdate, end=edate,
-                             freq=offsets.BQuarterEnd(startingMonth=12))
+        idx1 = date_range(start=sdate, end=edate, freq='BQ')
+        idx2 = date_range(start=sdate, end=edate,
+                          freq=offsets.BQuarterEnd(startingMonth=12))
         assert len(idx1) == len(idx2)
         assert idx1.freq == idx2.freq
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
index 06b52dfc407cf0..54a04ab6f80fd9 100644
--- a/pandas/tests/indexes/datetimes/test_date_range.py
+++ b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -561,10 +561,10 @@ def test_3(self):
         assert rng == expected
 
     def test_precision_finer_than_offset(self):
-        # GH 9907
-        result1 = DatetimeIndex(start='2015-04-15 00:00:03',
+        # GH#9907
+        result1 = pd.date_range(start='2015-04-15 00:00:03',
                                 end='2016-04-22 00:00:00', freq='Q')
-        result2 = DatetimeIndex(start='2015-04-15 00:00:03',
+        result2 = pd.date_range(start='2015-04-15 00:00:03',
                                 end='2015-06-22 00:00:04', freq='W')
         expected1_list = ['2015-06-30 00:00:03', '2015-09-30 00:00:03',
                           '2015-12-31 00:00:03', '2016-03-31 00:00:03']
@@ -594,7 +594,7 @@ def test_mismatching_tz_raises_err(self, start, end):
         with pytest.raises(TypeError):
             pd.date_range(start, end)
         with pytest.raises(TypeError):
-            pd.DatetimeIndex(start, end, freq=BDay())
+            pd.date_range(start, end, freq=BDay())
 
     def test_CalendarDay_range_with_dst_crossing(self):
         # GH 20596
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index 4363777d25235f..c3380260257677 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -100,10 +100,10 @@ def test_hash_error(self):
             hash(index)
 
     def test_stringified_slice_with_tz(self):
-        # GH2658
+        # GH#2658
         import datetime
         start = datetime.datetime.now()
-        idx = DatetimeIndex(start=start, freq="1d", periods=10)
+        idx = date_range(start=start, freq="1d", periods=10)
         df = DataFrame(lrange(10), index=idx)
         df["2013-01-14 23:44:34.437768-05:00":]  # no exception here
 
@@ -293,8 +293,8 @@ def test_ns_index(self):
         index = pd.DatetimeIndex(dt, freq=freq, name='time')
         self.assert_index_parameters(index)
 
-        new_index = pd.DatetimeIndex(start=index[0], end=index[-1],
-                                     freq=index.freq)
+        new_index = pd.date_range(start=index[0], end=index[-1],
+                                  freq=index.freq)
         self.assert_index_parameters(new_index)
 
     def test_join_with_period_index(self, join_type):
diff --git a/pandas/tests/indexes/datetimes/test_formats.py b/pandas/tests/indexes/datetimes/test_formats.py
index d8e4104919e554..df0a5742e7a49b 100644
--- a/pandas/tests/indexes/datetimes/test_formats.py
+++ b/pandas/tests/indexes/datetimes/test_formats.py
@@ -11,7 +11,7 @@
 
 
 def test_to_native_types():
-    index = DatetimeIndex(freq='1D', periods=3, start='2017-01-01')
+    index = pd.date_range(freq='1D', periods=3, start='2017-01-01')
 
     # First, with no arguments.
     expected = np.array(['2017-01-01', '2017-01-02',
diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py
index f75b5867e15111..944c925dabe3e1 100644
--- a/pandas/tests/indexes/datetimes/test_indexing.py
+++ b/pandas/tests/indexes/datetimes/test_indexing.py
@@ -198,7 +198,7 @@ def test_take2(self, tz):
         dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15),
                  datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)]
 
-        idx = DatetimeIndex(start='2010-01-01 09:00',
+        idx = pd.date_range(start='2010-01-01 09:00',
                             end='2010-02-01 09:00', freq='H', tz=tz,
                             name='idx')
         expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz)
diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py
index 6d6f13bb763f69..8d9f496b70079d 100644
--- a/pandas/tests/indexes/datetimes/test_misc.py
+++ b/pandas/tests/indexes/datetimes/test_misc.py
@@ -23,8 +23,8 @@ def test_pass_datetimeindex_to_index(self):
         tm.assert_numpy_array_equal(idx.values, expected.values)
 
     def test_range_edges(self):
-        # GH 13672
-        idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000001'),
+        # GH#13672
+        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000001'),
                             end=Timestamp('1970-01-01 00:00:00.000000004'),
                             freq='N')
         exp = DatetimeIndex(['1970-01-01 00:00:00.000000001',
@@ -33,19 +33,19 @@ def test_range_edges(self):
                              '1970-01-01 00:00:00.000000004'])
         tm.assert_index_equal(idx, exp)
 
-        idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000004'),
+        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000004'),
                             end=Timestamp('1970-01-01 00:00:00.000000001'),
                             freq='N')
         exp = DatetimeIndex([])
         tm.assert_index_equal(idx, exp)
 
-        idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000000001'),
+        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000001'),
                             end=Timestamp('1970-01-01 00:00:00.000000001'),
                             freq='N')
         exp = DatetimeIndex(['1970-01-01 00:00:00.000000001'])
         tm.assert_index_equal(idx, exp)
 
-        idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.000001'),
+        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000001'),
                             end=Timestamp('1970-01-01 00:00:00.000004'),
                             freq='U')
         exp = DatetimeIndex(['1970-01-01 00:00:00.000001',
@@ -54,7 +54,7 @@ def test_range_edges(self):
                              '1970-01-01 00:00:00.000004'])
         tm.assert_index_equal(idx, exp)
 
-        idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:00.001'),
+        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.001'),
                             end=Timestamp('1970-01-01 00:00:00.004'),
                             freq='L')
         exp = DatetimeIndex(['1970-01-01 00:00:00.001',
@@ -63,25 +63,25 @@ def test_range_edges(self):
                              '1970-01-01 00:00:00.004'])
         tm.assert_index_equal(idx, exp)
 
-        idx = DatetimeIndex(start=Timestamp('1970-01-01 00:00:01'),
+        idx = pd.date_range(start=Timestamp('1970-01-01 00:00:01'),
                             end=Timestamp('1970-01-01 00:00:04'), freq='S')
         exp = DatetimeIndex(['1970-01-01 00:00:01', '1970-01-01 00:00:02',
                              '1970-01-01 00:00:03', '1970-01-01 00:00:04'])
         tm.assert_index_equal(idx, exp)
 
-        idx = DatetimeIndex(start=Timestamp('1970-01-01 00:01'),
+        idx = pd.date_range(start=Timestamp('1970-01-01 00:01'),
                             end=Timestamp('1970-01-01 00:04'), freq='T')
         exp = DatetimeIndex(['1970-01-01 00:01', '1970-01-01 00:02',
                              '1970-01-01 00:03', '1970-01-01 00:04'])
         tm.assert_index_equal(idx, exp)
 
-        idx = DatetimeIndex(start=Timestamp('1970-01-01 01:00'),
+        idx = pd.date_range(start=Timestamp('1970-01-01 01:00'),
                             end=Timestamp('1970-01-01 04:00'), freq='H')
         exp = DatetimeIndex(['1970-01-01 01:00', '1970-01-01 02:00',
                              '1970-01-01 03:00', '1970-01-01 04:00'])
         tm.assert_index_equal(idx, exp)
 
-        idx = DatetimeIndex(start=Timestamp('1970-01-01'),
+        idx = pd.date_range(start=Timestamp('1970-01-01'),
                             end=Timestamp('1970-01-04'), freq='D')
         exp = DatetimeIndex(['1970-01-01', '1970-01-02',
                              '1970-01-03', '1970-01-04'])
@@ -91,10 +91,10 @@ def test_range_edges(self):
 class TestDatetime64(object):
 
     def test_datetimeindex_accessors(self):
-        dti_naive = DatetimeIndex(freq='D', start=datetime(1998, 1, 1),
+        dti_naive = pd.date_range(freq='D', start=datetime(1998, 1, 1),
                                   periods=365)
-        # GH 13303
-        dti_tz = DatetimeIndex(freq='D', start=datetime(1998, 1, 1),
+        # GH#13303
+        dti_tz = pd.date_range(freq='D', start=datetime(1998, 1, 1),
                                periods=365, tz='US/Eastern')
         for dti in [dti_naive, dti_tz]:
 
@@ -179,7 +179,7 @@ def test_datetimeindex_accessors(self):
             exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name')
             tm.assert_index_equal(res, exp)
 
-        dti = DatetimeIndex(freq='BQ-FEB', start=datetime(1998, 1, 1),
+        dti = pd.date_range(freq='BQ-FEB', start=datetime(1998, 1, 1),
                             periods=4)
 
         assert sum(dti.is_quarter_start) == 0
@@ -260,8 +260,8 @@ def test_datetime_name_accessors(self, time_locale):
                 expected_days = calendar.day_name[:]
                 expected_months = calendar.month_name[1:]
 
-        # GH 11128
-        dti = DatetimeIndex(freq='D', start=datetime(1998, 1, 1),
+        # GH#11128
+        dti = pd.date_range(freq='D', start=datetime(1998, 1, 1),
                             periods=365)
         english_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
                         'Friday', 'Saturday', 'Sunday']
@@ -281,8 +281,8 @@ def test_datetime_name_accessors(self, time_locale):
         ts = Timestamp(pd.NaT)
         assert np.isnan(ts.day_name(locale=time_locale))
 
-        # GH 12805
-        dti = DatetimeIndex(freq='M', start='2012', end='2013')
+        # GH#12805
+        dti = pd.date_range(freq='M', start='2012', end='2013')
         result = dti.month_name(locale=time_locale)
         expected = Index([month.capitalize() for month in expected_months])
 
diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py
index d599af6180bfbb..4be4372f65dcc5 100644
--- a/pandas/tests/indexes/datetimes/test_ops.py
+++ b/pandas/tests/indexes/datetimes/test_ops.py
@@ -535,9 +535,8 @@ def test_shift(self):
             assert shifted[0] == rng[0] + CDay()
 
     def test_shift_periods(self):
-        # GH #22458 : argument 'n' was deprecated in favor of 'periods'
-        idx = pd.DatetimeIndex(start=START, end=END,
-                               periods=3)
+        # GH#22458 : argument 'n' was deprecated in favor of 'periods'
+        idx = pd.date_range(start=START, end=END, periods=3)
         tm.assert_index_equal(idx.shift(periods=0), idx)
         tm.assert_index_equal(idx.shift(0), idx)
         with tm.assert_produces_warning(FutureWarning,
diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py
index e6e19c6a8200d4..1b2aab9d370a3d 100644
--- a/pandas/tests/indexes/datetimes/test_partial_slicing.py
+++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py
@@ -15,7 +15,7 @@
 
 class TestSlicing(object):
     def test_dti_slicing(self):
-        dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M')
+        dti = date_range(start='1/1/2005', end='12/1/2005', freq='M')
         dti2 = dti[[1, 3, 5]]
 
         v1 = dti2[0]
@@ -74,8 +74,8 @@ def test_slice_with_zero_step_raises(self):
             ts.loc[::0]
 
     def test_slice_bounds_empty(self):
-        # GH 14354
-        empty_idx = DatetimeIndex(freq='1H', periods=0, end='2015')
+        # GH#14354
+        empty_idx = date_range(freq='1H', periods=0, end='2015')
 
         right = empty_idx._maybe_cast_slice_bound('2015-01-02', 'right', 'loc')
         exp = Timestamp('2015-01-02 23:59:59.999999999')
@@ -114,7 +114,7 @@ def test_monotone_DTI_indexing_bug(self):
         tm.assert_frame_equal(df.loc['2017-01-03'], expected)
 
     def test_slice_year(self):
-        dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500)
+        dti = date_range(freq='B', start=datetime(2005, 1, 1), periods=500)
 
         s = Series(np.arange(len(dti)), index=dti)
         result = s['2005']
@@ -133,7 +133,7 @@ def test_slice_year(self):
         assert result == expected
 
     def test_slice_quarter(self):
-        dti = DatetimeIndex(freq='D', start=datetime(2000, 6, 1), periods=500)
+        dti = date_range(freq='D', start=datetime(2000, 6, 1), periods=500)
 
         s = Series(np.arange(len(dti)), index=dti)
         assert len(s['2001Q1']) == 90
@@ -142,7 +142,7 @@ def test_slice_quarter(self):
         assert len(df.loc['1Q01']) == 90
 
     def test_slice_month(self):
-        dti = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500)
+        dti = date_range(freq='D', start=datetime(2005, 1, 1), periods=500)
         s = Series(np.arange(len(dti)), index=dti)
         assert len(s['2005-11']) == 30
 
@@ -152,7 +152,7 @@ def test_slice_month(self):
         tm.assert_series_equal(s['2005-11'], s['11-2005'])
 
     def test_partial_slice(self):
-        rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500)
+        rng = date_range(freq='D', start=datetime(2005, 1, 1), periods=500)
         s = Series(np.arange(len(rng)), index=rng)
 
         result = s['2005-05':'2006-02']
@@ -173,7 +173,7 @@ def test_partial_slice(self):
         pytest.raises(Exception, s.__getitem__, '2004-12-31')
 
     def test_partial_slice_daily(self):
-        rng = DatetimeIndex(freq='H', start=datetime(2005, 1, 31), periods=500)
+        rng = date_range(freq='H', start=datetime(2005, 1, 31), periods=500)
         s = Series(np.arange(len(rng)), index=rng)
 
         result = s['2005-1-31']
@@ -182,8 +182,8 @@ def test_partial_slice_daily(self):
         pytest.raises(Exception, s.__getitem__, '2004-12-31 00')
 
     def test_partial_slice_hourly(self):
-        rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0),
-                            periods=500)
+        rng = date_range(freq='T', start=datetime(2005, 1, 1, 20, 0, 0),
+                         periods=500)
         s = Series(np.arange(len(rng)), index=rng)
 
         result = s['2005-1-1']
@@ -196,8 +196,8 @@ def test_partial_slice_hourly(self):
         pytest.raises(Exception, s.__getitem__, '2004-12-31 00:15')
 
     def test_partial_slice_minutely(self):
-        rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0),
-                            periods=500)
+        rng = date_range(freq='S', start=datetime(2005, 1, 1, 23, 59, 0),
+                         periods=500)
         s = Series(np.arange(len(rng)), index=rng)
 
         result = s['2005-1-1 23:59']
@@ -210,9 +210,9 @@ def test_partial_slice_minutely(self):
         pytest.raises(Exception, s.__getitem__, '2004-12-31 00:00:00')
 
     def test_partial_slice_second_precision(self):
-        rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59,
-                                           microsecond=999990),
-                            periods=20, freq='US')
+        rng = date_range(start=datetime(2005, 1, 1, 0, 0, 59,
+                                        microsecond=999990),
+                         periods=20, freq='US')
         s = Series(np.arange(20), rng)
 
         tm.assert_series_equal(s['2005-1-1 00:00'], s.iloc[:10])
diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py
index eb4169639482fc..680eddd27cf9f4 100644
--- a/pandas/tests/indexes/datetimes/test_scalar_compat.py
+++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py
@@ -184,7 +184,7 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
         '12H', '1D',
     ])
     def test_round_int64(self, start, index_freq, periods, round_freq):
-        dt = DatetimeIndex(start=start, freq=index_freq, periods=periods)
+        dt = date_range(start=start, freq=index_freq, periods=periods)
         unit = to_offset(round_freq).nanos
 
         # test floor
diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py
index 7c1f753dbeaaa9..1f7a2eee757505 100644
--- a/pandas/tests/indexes/datetimes/test_setops.py
+++ b/pandas/tests/indexes/datetimes/test_setops.py
@@ -114,7 +114,7 @@ def test_union_dataframe_index(self):
 
     def test_union_with_DatetimeIndex(self):
         i1 = Int64Index(np.arange(0, 20, 2))
-        i2 = DatetimeIndex(start='2012-01-03 00:00:00', periods=10, freq='D')
+        i2 = date_range(start='2012-01-03 00:00:00', periods=10, freq='D')
         i1.union(i2)  # Works
         i2.union(i1)  # Fails with "AttributeError: can't set attribute"
 
@@ -253,14 +253,14 @@ def test_difference_freq(self, sort):
 
     @pytest.mark.parametrize("sort", [True, False])
     def test_datetimeindex_diff(self, sort):
-        dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31),
-                             periods=100)
-        dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31),
-                             periods=98)
+        dti1 = date_range(freq='Q-JAN', start=datetime(1997, 12, 31),
+                          periods=100)
+        dti2 = date_range(freq='Q-JAN', start=datetime(1997, 12, 31),
+                          periods=98)
         assert len(dti1.difference(dti2, sort)) == 2
 
     def test_datetimeindex_union_join_empty(self):
-        dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D')
+        dti = date_range(start='1/1/2001', end='2/1/2001', freq='D')
         empty = Index([])
 
         result = dti.union(empty)
diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py
index 52f4f3c3090385..8c7d20684fd8c1 100644
--- a/pandas/tests/indexes/datetimes/test_timezones.py
+++ b/pandas/tests/indexes/datetimes/test_timezones.py
@@ -392,11 +392,11 @@ def test_dti_tz_localize_pass_dates_to_utc(self, tzstr):
     @pytest.mark.parametrize('prefix', ['', 'dateutil/'])
     def test_dti_tz_localize(self, prefix):
         tzstr = prefix + 'US/Eastern'
-        dti = DatetimeIndex(start='1/1/2005', end='1/1/2005 0:00:30.256',
+        dti = pd.date_range(start='1/1/2005', end='1/1/2005 0:00:30.256',
                             freq='L')
         dti2 = dti.tz_localize(tzstr)
 
-        dti_utc = DatetimeIndex(start='1/1/2005 05:00',
+        dti_utc = pd.date_range(start='1/1/2005 05:00',
                                 end='1/1/2005 5:00:30.256', freq='L', tz='utc')
 
         tm.assert_numpy_array_equal(dti2.values, dti_utc.values)
@@ -404,12 +404,12 @@ def test_dti_tz_localize(self, prefix):
         dti3 = dti2.tz_convert(prefix + 'US/Pacific')
         tm.assert_numpy_array_equal(dti3.values, dti_utc.values)
 
-        dti = DatetimeIndex(start='11/6/2011 1:59', end='11/6/2011 2:00',
+        dti = pd.date_range(start='11/6/2011 1:59', end='11/6/2011 2:00',
                             freq='L')
         with pytest.raises(pytz.AmbiguousTimeError):
             dti.tz_localize(tzstr)
 
-        dti = DatetimeIndex(start='3/13/2011 1:59', end='3/13/2011 2:00',
+        dti = pd.date_range(start='3/13/2011 1:59', end='3/13/2011 2:00',
                             freq='L')
         with pytest.raises(pytz.NonExistentTimeError):
             dti.tz_localize(tzstr)
@@ -721,7 +721,7 @@ def test_dti_tz_constructors(self, tzstr):
         arr = ['11/10/2005 08:00:00', '11/10/2005 09:00:00']
 
         idx1 = to_datetime(arr).tz_localize(tzstr)
-        idx2 = DatetimeIndex(start="2005-11-10 08:00:00", freq='H', periods=2,
+        idx2 = pd.date_range(start="2005-11-10 08:00:00", freq='H', periods=2,
                              tz=tzstr)
         idx3 = DatetimeIndex(arr, tz=tzstr)
         idx4 = DatetimeIndex(np.array(arr), tz=tzstr)
diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py
index c92769311d8482..aaa1126e92f3d0 100644
--- a/pandas/tests/indexes/period/test_indexing.py
+++ b/pandas/tests/indexes/period/test_indexing.py
@@ -136,8 +136,8 @@ def test_getitem_list_periods(self):
         tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp)
 
     def test_getitem_seconds(self):
-        # GH 6716
-        didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S',
+        # GH#6716
+        didx = pd.date_range(start='2013/01/01 09:00:00', freq='S',
                              periods=4000)
         pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000)
 
@@ -160,9 +160,9 @@ def test_getitem_seconds(self):
                 tm.assert_series_equal(s[d], s)
 
     def test_getitem_day(self):
-        # GH 6716
+        # GH#6716
         # Confirm DatetimeIndex and PeriodIndex works identically
-        didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400)
+        didx = pd.date_range(start='2013/01/01', freq='D', periods=400)
         pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400)
 
         for idx in [didx, pidx]:
diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py
index 137a7be987d5b9..014a92a7aa39d3 100644
--- a/pandas/tests/indexes/period/test_partial_slicing.py
+++ b/pandas/tests/indexes/period/test_partial_slicing.py
@@ -2,8 +2,7 @@
 import pytest
 
 import pandas as pd
-from pandas import (
-    DataFrame, DatetimeIndex, Period, PeriodIndex, Series, period_range)
+from pandas import DataFrame, Period, PeriodIndex, Series, period_range
 from pandas.util import testing as tm
 
 
@@ -63,8 +62,8 @@ def test_pindex_slice_index(self):
         tm.assert_series_equal(res, exp)
 
     def test_range_slice_day(self):
-        # GH 6716
-        didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400)
+        # GH#6716
+        didx = pd.date_range(start='2013/01/01', freq='D', periods=400)
         pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400)
 
         for idx in [didx, pidx]:
@@ -88,8 +87,8 @@ def test_range_slice_day(self):
                     idx[v:]
 
     def test_range_slice_seconds(self):
-        # GH 6716
-        didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S',
+        # GH#6716
+        didx = pd.date_range(start='2013/01/01 09:00:00', freq='S',
                              periods=4000)
         pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000)
 
@@ -113,8 +112,8 @@ def test_range_slice_seconds(self):
                 tm.assert_series_equal(s[d:], s)
 
     def test_range_slice_outofbounds(self):
-        # GH 5407
-        didx = DatetimeIndex(start='2013/10/01', freq='D', periods=10)
+        # GH#5407
+        didx = pd.date_range(start='2013/10/01', freq='D', periods=10)
         pidx = PeriodIndex(start='2013/10/01', freq='D', periods=10)
 
         for idx in [didx, pidx]:
diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py
index c8e1e6c1f35258..15dd4cd520cf82 100644
--- a/pandas/tests/indexes/period/test_tools.py
+++ b/pandas/tests/indexes/period/test_tools.py
@@ -127,7 +127,7 @@ def test_to_timestamp_to_period_astype(self):
         tm.assert_index_equal(res, exp)
 
     def test_dti_to_period(self):
-        dti = DatetimeIndex(start='1/1/2005', end='12/1/2005', freq='M')
+        dti = pd.date_range(start='1/1/2005', end='12/1/2005', freq='M')
         pi1 = dti.to_period()
         pi2 = dti.to_period(freq='D')
         pi3 = dti.to_period(freq='3D')
@@ -193,8 +193,8 @@ def test_period_dt64_round_trip(self):
         tm.assert_index_equal(pi.to_timestamp(), dti)
 
     def test_combine_first(self):
-        # GH 3367
-        didx = pd.DatetimeIndex(start='1950-01-31', end='1950-07-31', freq='M')
+        # GH#3367
+        didx = pd.date_range(start='1950-01-31', end='1950-07-31', freq='M')
         pidx = pd.PeriodIndex(start=pd.Period('1950-1'),
                               end=pd.Period('1950-7'), freq='M')
         # check to be consistent with DatetimeIndex
diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py
index ba20febfeafad3..46ec38468d949e 100644
--- a/pandas/tests/indexes/timedeltas/test_construction.py
+++ b/pandas/tests/indexes/timedeltas/test_construction.py
@@ -11,6 +11,16 @@
 
 class TestTimedeltaIndex(object):
 
+    def test_verify_integrity_deprecated(self):
+        # GH#23919
+        with tm.assert_produces_warning(FutureWarning):
+            TimedeltaIndex(['1 Day'], verify_integrity=False)
+
+    def test_range_kwargs_deprecated(self):
+        # GH#23919
+        with tm.assert_produces_warning(FutureWarning):
+            TimedeltaIndex(start='1 Day', end='3 Days', freq='D')
+
     def test_int64_nocopy(self):
         # GH#23539 check that a copy isn't made when we pass int64 data
         #  and copy=False
@@ -133,10 +143,11 @@ def test_constructor_coverage(self):
 
         msg = 'periods must be a number, got foo'
         with pytest.raises(TypeError, match=msg):
-            TimedeltaIndex(start='1 days', periods='foo', freq='D')
+            timedelta_range(start='1 days', periods='foo', freq='D')
 
-        pytest.raises(ValueError, TimedeltaIndex, start='1 days',
-                      end='10 days')
+        with pytest.raises(ValueError):
+            with tm.assert_produces_warning(FutureWarning):
+                TimedeltaIndex(start='1 days', end='10 days')
 
         with pytest.raises(TypeError):
             TimedeltaIndex('1 days')
@@ -160,10 +171,10 @@ def test_constructor_coverage(self):
         pytest.raises(ValueError, TimedeltaIndex,
                       ['1 days', '2 days', '4 days'], freq='D')
 
-        pytest.raises(ValueError, TimedeltaIndex, periods=10, freq='D')
+        pytest.raises(ValueError, timedelta_range, periods=10, freq='D')
 
     def test_constructor_name(self):
-        idx = TimedeltaIndex(start='1 days', periods=1, freq='D', name='TEST')
+        idx = timedelta_range(start='1 days', periods=1, freq='D', name='TEST')
         assert idx.name == 'TEST'
 
         # GH10025
diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py
index bfed4114929b7f..94d694b644eb81 100644
--- a/pandas/tests/indexes/timedeltas/test_indexing.py
+++ b/pandas/tests/indexes/timedeltas/test_indexing.py
@@ -115,7 +115,7 @@ def test_take_invalid_kwargs(self):
     # TODO: This method came from test_timedelta; de-dup with version above
     def test_take2(self):
         tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00']
-        idx = TimedeltaIndex(start='1d', end='2d', freq='H', name='idx')
+        idx = timedelta_range(start='1d', end='2d', freq='H', name='idx')
         expected = TimedeltaIndex(tds, freq=None, name='idx')
 
         taken1 = idx.take([2, 4, 10])
diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py
index 35b2cff13c015d..45101da78d9c71 100644
--- a/pandas/tests/indexes/timedeltas/test_setops.py
+++ b/pandas/tests/indexes/timedeltas/test_setops.py
@@ -16,7 +16,7 @@ def test_union(self):
         tm.assert_index_equal(result, expected)
 
         i1 = Int64Index(np.arange(0, 20, 2))
-        i2 = TimedeltaIndex(start='1 day', periods=10, freq='D')
+        i2 = timedelta_range(start='1 day', periods=10, freq='D')
         i1.union(i2)  # Works
         i2.union(i1)  # Fails with "AttributeError: can't set attribute"
 
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
index 882a629b5b7067..e7f87b236ef7fc 100644
--- a/pandas/tests/io/formats/test_to_html.py
+++ b/pandas/tests/io/formats/test_to_html.py
@@ -206,7 +206,7 @@ def test_to_html_regression_GH6098(self):
         df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_()
 
     def test_to_html_truncate(self, datapath):
-        index = pd.DatetimeIndex(start='20010101', freq='D', periods=20)
+        index = pd.date_range(start='20010101', freq='D', periods=20)
         df = DataFrame(index=index, columns=range(20))
         result = df.to_html(max_rows=8, max_cols=4)
         expected = expected_html(datapath, 'truncate')
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index e70ae03e007ee3..ffc8af09bf239e 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -840,9 +840,9 @@ def test_parse_timezone(all_parsers):
               2018-01-04 09:05:00+09:00,23400"""
     result = parser.read_csv(StringIO(data), parse_dates=["dt"])
 
-    dti = pd.DatetimeIndex(start="2018-01-04 09:01:00",
-                           end="2018-01-04 09:05:00", freq="1min",
-                           tz=pytz.FixedOffset(540))
+    dti = pd.date_range(start="2018-01-04 09:01:00",
+                        end="2018-01-04 09:05:00", freq="1min",
+                        tz=pytz.FixedOffset(540))
     expected_data = {"dt": dti, "val": [23350, 23400, 23400, 23400, 23400]}
 
     expected = DataFrame(expected_data)
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index 2e204f6d18d708..b26fcf71306ddd 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -1273,7 +1273,7 @@ def test_format_date_axis(self):
 
     @pytest.mark.slow
     def test_ax_plot(self):
-        x = DatetimeIndex(start='2012-01-02', periods=10, freq='D')
+        x = date_range(start='2012-01-02', periods=10, freq='D')
         y = lrange(len(x))
         _, ax = self.plt.subplots()
         lines = ax.plot(x, y, label='Y')
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index 44a3b7005477d8..b287eb468cd943 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -37,8 +37,8 @@ def _series_name(self):
         return 'dti'
 
     def setup_method(self, method):
-        dti = DatetimeIndex(start=datetime(2005, 1, 1),
-                            end=datetime(2005, 1, 10), freq='Min')
+        dti = date_range(start=datetime(2005, 1, 1),
+                         end=datetime(2005, 1, 10), freq='Min')
 
         self.series = Series(np.random.rand(len(dti)), dti)
 
@@ -50,8 +50,8 @@ def create_series(self):
 
     def test_custom_grouper(self):
 
-        dti = DatetimeIndex(freq='Min', start=datetime(2005, 1, 1),
-                            end=datetime(2005, 1, 10))
+        dti = date_range(freq='Min', start=datetime(2005, 1, 1),
+                         end=datetime(2005, 1, 10))
 
         s = Series(np.array([1] * len(dti)), index=dti, dtype='int64')
 
@@ -189,9 +189,9 @@ def test_numpy_compat(self):
                 getattr(r, func)(axis=1)
 
     def test_resample_how_callables(self):
-        # GH 7929
+        # GH#7929
         data = np.arange(5, dtype=np.int64)
-        ind = pd.DatetimeIndex(start='2014-01-01', periods=len(data), freq='d')
+        ind = date_range(start='2014-01-01', periods=len(data), freq='d')
         df = DataFrame({"A": data, "B": data}, index=ind)
 
         def fn(x, a=1):
@@ -308,8 +308,8 @@ def test_resample_rounding(self):
 
     def test_resample_basic_from_daily(self):
         # from daily
-        dti = DatetimeIndex(start=datetime(2005, 1, 1),
-                            end=datetime(2005, 1, 10), freq='D', name='index')
+        dti = date_range(start=datetime(2005, 1, 1),
+                         end=datetime(2005, 1, 10), freq='D', name='index')
 
         s = Series(np.random.rand(len(dti)), dti)
 
@@ -426,8 +426,8 @@ def test_resample_loffset(self, loffset):
         assert result.index.freq == Minute(5)
 
         # from daily
-        dti = DatetimeIndex(start=datetime(2005, 1, 1),
-                            end=datetime(2005, 1, 10), freq='D')
+        dti = date_range(start=datetime(2005, 1, 1),
+                         end=datetime(2005, 1, 10), freq='D')
         ser = Series(np.random.rand(len(dti)), dti)
 
         # to weekly
@@ -472,8 +472,8 @@ def test_resample_loffset_count(self):
 
     def test_resample_upsample(self):
         # from daily
-        dti = DatetimeIndex(start=datetime(2005, 1, 1),
-                            end=datetime(2005, 1, 10), freq='D', name='index')
+        dti = date_range(start=datetime(2005, 1, 1),
+                         end=datetime(2005, 1, 10), freq='D', name='index')
 
         s = Series(np.random.rand(len(dti)), dti)
 
@@ -501,11 +501,11 @@ def test_resample_how_method(self):
         assert_series_equal(s.resample("10S").mean(), expected)
 
     def test_resample_extra_index_point(self):
-        # GH 9756
-        index = DatetimeIndex(start='20150101', end='20150331', freq='BM')
+        # GH#9756
+        index = date_range(start='20150101', end='20150331', freq='BM')
         expected = DataFrame({'A': Series([21, 41, 63], index=index)})
 
-        index = DatetimeIndex(start='20150101', end='20150331', freq='B')
+        index = date_range(start='20150101', end='20150331', freq='B')
         df = DataFrame(
             {'A': Series(range(len(index)), index=index)}, dtype='int64')
         result = df.resample('BM').last()
@@ -619,8 +619,8 @@ def test_resample_dup_index(self):
         assert_frame_equal(result, expected)
 
     def test_resample_reresample(self):
-        dti = DatetimeIndex(start=datetime(2005, 1, 1),
-                            end=datetime(2005, 1, 10), freq='D')
+        dti = date_range(start=datetime(2005, 1, 1),
+                         end=datetime(2005, 1, 10), freq='D')
         s = Series(np.random.rand(len(dti)), dti)
         bs = s.resample('B', closed='right', label='right').mean()
         result = bs.resample('8H').mean()
diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py
index 99b8edd5dbbea5..0b3e67ca0525a6 100644
--- a/pandas/tests/resample/test_period_index.py
+++ b/pandas/tests/resample/test_period_index.py
@@ -15,7 +15,6 @@
 from pandas import DataFrame, Series, Timestamp
 from pandas.core.indexes.datetimes import date_range
 from pandas.core.indexes.period import Period, PeriodIndex, period_range
-from pandas.core.resample import DatetimeIndex
 from pandas.tests.resample.test_base import (
     Base, resample_methods, simple_period_range_series)
 import pandas.util.testing as tm
@@ -542,7 +541,7 @@ def test_quarterly_resampling(self):
 
     def test_resample_weekly_bug_1726(self):
         # 8/6/12 is a Monday
-        ind = DatetimeIndex(start="8/6/2012", end="8/26/2012", freq="D")
+        ind = date_range(start="8/6/2012", end="8/26/2012", freq="D")
         n = len(ind)
         data = [[x] * 5 for x in range(n)]
         df = DataFrame(data, columns=['open', 'high', 'low', 'close', 'vol'],
@@ -605,7 +604,7 @@ def test_default_right_closed_label(self):
         end_types = ['M', 'A', 'Q', 'W']
 
         for from_freq, to_freq in zip(end_freq, end_types):
-            idx = DatetimeIndex(start='8/15/2012', periods=100, freq=from_freq)
+            idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
             df = DataFrame(np.random.randn(len(idx), 2), idx)
 
             resampled = df.resample(to_freq).mean()
@@ -617,7 +616,7 @@ def test_default_left_closed_label(self):
         others_freq = ['D', 'Q', 'M', 'H', 'T']
 
         for from_freq, to_freq in zip(others_freq, others):
-            idx = DatetimeIndex(start='8/15/2012', periods=100, freq=from_freq)
+            idx = date_range(start='8/15/2012', periods=100, freq=from_freq)
             df = DataFrame(np.random.randn(len(idx), 2), idx)
 
             resampled = df.resample(to_freq).mean()
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 51cf09c7640e5b..43c087a932a7e8 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -10,12 +10,11 @@
 import pandas as pd
 from pandas import DataFrame, Series
 from pandas.core.indexes.datetimes import date_range
-from pandas.core.resample import DatetimeIndex
 import pandas.util.testing as tm
 from pandas.util.testing import assert_frame_equal, assert_series_equal
 
-dti = DatetimeIndex(start=datetime(2005, 1, 1),
-                    end=datetime(2005, 1, 10), freq='Min')
+dti = date_range(start=datetime(2005, 1, 1),
+                 end=datetime(2005, 1, 10), freq='Min')
 
 test_series = Series(np.random.rand(len(dti)), dti)
 test_frame = DataFrame(
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index 71db7844a9db50..3035412d7b8369 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -621,22 +621,22 @@ def test_tolerance_nearest(self):
     def test_tolerance_tz(self):
         # GH 14844
         left = pd.DataFrame(
-            {'date': pd.DatetimeIndex(start=pd.to_datetime('2016-01-02'),
-                                      freq='D', periods=5,
-                                      tz=pytz.timezone('UTC')),
+            {'date': pd.date_range(start=pd.to_datetime('2016-01-02'),
+                                   freq='D', periods=5,
+                                   tz=pytz.timezone('UTC')),
              'value1': np.arange(5)})
         right = pd.DataFrame(
-            {'date': pd.DatetimeIndex(start=pd.to_datetime('2016-01-01'),
-                                      freq='D', periods=5,
-                                      tz=pytz.timezone('UTC')),
+            {'date': pd.date_range(start=pd.to_datetime('2016-01-01'),
+                                   freq='D', periods=5,
+                                   tz=pytz.timezone('UTC')),
              'value2': list("ABCDE")})
         result = pd.merge_asof(left, right, on='date',
                                tolerance=pd.Timedelta('1 day'))
 
         expected = pd.DataFrame(
-            {'date': pd.DatetimeIndex(start=pd.to_datetime('2016-01-02'),
-                                      freq='D', periods=5,
-                                      tz=pytz.timezone('UTC')),
+            {'date': pd.date_range(start=pd.to_datetime('2016-01-02'),
+                                   freq='D', periods=5,
+                                   tz=pytz.timezone('UTC')),
              'value1': np.arange(5),
              'value2': list("BCDEE")})
         assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 07b00cef2669e4..4113fb7f0f11e1 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -335,9 +335,9 @@ def test_concatlike_datetimetz(self, tz_aware_fixture):
     @pytest.mark.parametrize('tz',
                              ['UTC', 'US/Eastern', 'Asia/Tokyo', 'EST5EDT'])
     def test_concatlike_datetimetz_short(self, tz):
-        # GH 7795
-        ix1 = pd.DatetimeIndex(start='2014-07-15', end='2014-07-17',
-                               freq='D', tz=tz)
+        # GH#7795
+        ix1 = pd.date_range(start='2014-07-15', end='2014-07-17',
+                            freq='D', tz=tz)
         ix2 = pd.DatetimeIndex(['2014-07-11', '2014-07-21'], tz=tz)
         df1 = pd.DataFrame(0, index=ix1, columns=['A', 'B'])
         df2 = pd.DataFrame(0, index=ix2, columns=['A', 'B'])
diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
index cdcc423e3410cc..21395f6004760a 100644
--- a/pandas/tests/series/indexing/test_datetime.py
+++ b/pandas/tests/series/indexing/test_datetime.py
@@ -23,8 +23,8 @@
 
 
 def test_fancy_getitem():
-    dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1),
-                        end=datetime(2010, 1, 1))
+    dti = date_range(freq='WOM-1FRI', start=datetime(2005, 1, 1),
+                     end=datetime(2010, 1, 1))
 
     s = Series(np.arange(len(dti)), index=dti)
 
@@ -40,8 +40,8 @@ def test_fancy_getitem():
 
 
 def test_fancy_setitem():
-    dti = DatetimeIndex(freq='WOM-1FRI', start=datetime(2005, 1, 1),
-                        end=datetime(2010, 1, 1))
+    dti = date_range(freq='WOM-1FRI', start=datetime(2005, 1, 1),
+                     end=datetime(2010, 1, 1))
 
     s = Series(np.arange(len(dti)), index=dti)
     s[48] = -1
@@ -69,7 +69,7 @@ def test_dti_snap():
 
 
 def test_dti_reset_index_round_trip():
-    dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D')
+    dti = date_range(start='1/1/2001', end='6/1/2001', freq='D')
     d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti)
     d2 = d1.reset_index()
     assert d2.dtypes[0] == np.dtype('M8[ns]')
diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py
index b1c92c2b82a56f..5c3cf5450986a4 100644
--- a/pandas/tests/series/test_datetime_values.py
+++ b/pandas/tests/series/test_datetime_values.py
@@ -335,8 +335,8 @@ def test_dt_accessor_datetime_name_accessors(self, time_locale):
                 expected_days = calendar.day_name[:]
                 expected_months = calendar.month_name[1:]
 
-        s = Series(DatetimeIndex(freq='D', start=datetime(1998, 1, 1),
-                                 periods=365))
+        s = Series(date_range(freq='D', start=datetime(1998, 1, 1),
+                              periods=365))
         english_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
                         'Friday', 'Saturday', 'Sunday']
         for day, name, eng_name in zip(range(4, 11),
@@ -348,7 +348,7 @@ def test_dt_accessor_datetime_name_accessors(self, time_locale):
         s = s.append(Series([pd.NaT]))
         assert np.isnan(s.dt.day_name(locale=time_locale).iloc[-1])
 
-        s = Series(DatetimeIndex(freq='M', start='2012', end='2013'))
+        s = Series(date_range(freq='M', start='2012', end='2013'))
         result = s.dt.month_name(locale=time_locale)
         expected = Series([month.capitalize() for month in expected_months])
 
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 5b820d7b83a323..269b017fa2141b 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -16,7 +16,7 @@
 from pandas.compat import range
 from pandas.compat.numpy import np_datetime64_compat
 
-from pandas.core.indexes.datetimes import DatetimeIndex, _to_m8
+from pandas.core.indexes.datetimes import DatetimeIndex, _to_m8, date_range
 from pandas.core.indexes.timedeltas import TimedeltaIndex
 from pandas.core.series import Series
 import pandas.util.testing as tm
@@ -1361,10 +1361,10 @@ def test_apply_nanoseconds(self):
                 assert_offset_equal(offset, base, expected)
 
     def test_datetimeindex(self):
-        idx1 = DatetimeIndex(start='2014-07-04 15:00', end='2014-07-08 10:00',
-                             freq='BH')
-        idx2 = DatetimeIndex(start='2014-07-04 15:00', periods=12, freq='BH')
-        idx3 = DatetimeIndex(end='2014-07-08 10:00', periods=12, freq='BH')
+        idx1 = date_range(start='2014-07-04 15:00', end='2014-07-08 10:00',
+                          freq='BH')
+        idx2 = date_range(start='2014-07-04 15:00', periods=12, freq='BH')
+        idx3 = date_range(end='2014-07-08 10:00', periods=12, freq='BH')
         expected = DatetimeIndex(['2014-07-04 15:00', '2014-07-04 16:00',
                                   '2014-07-07 09:00',
                                   '2014-07-07 10:00', '2014-07-07 11:00',
@@ -1377,10 +1377,10 @@ def test_datetimeindex(self):
         for idx in [idx1, idx2, idx3]:
             tm.assert_index_equal(idx, expected)
 
-        idx1 = DatetimeIndex(start='2014-07-04 15:45', end='2014-07-08 10:45',
-                             freq='BH')
-        idx2 = DatetimeIndex(start='2014-07-04 15:45', periods=12, freq='BH')
-        idx3 = DatetimeIndex(end='2014-07-08 10:45', periods=12, freq='BH')
+        idx1 = date_range(start='2014-07-04 15:45', end='2014-07-08 10:45',
+                          freq='BH')
+        idx2 = date_range(start='2014-07-04 15:45', periods=12, freq='BH')
+        idx3 = date_range(end='2014-07-08 10:45', periods=12, freq='BH')
 
         expected = DatetimeIndex(['2014-07-04 15:45', '2014-07-04 16:45',
                                   '2014-07-07 09:45',
@@ -1999,8 +1999,8 @@ def test_datetimeindex(self):
         hcal = USFederalHolidayCalendar()
         freq = CBMonthEnd(calendar=hcal)
 
-        assert (DatetimeIndex(start='20120101', end='20130101',
-                              freq=freq).tolist()[0] == datetime(2012, 1, 31))
+        assert (date_range(start='20120101', end='20130101',
+                           freq=freq).tolist()[0] == datetime(2012, 1, 31))
 
 
 class TestCustomBusinessMonthBegin(CustomBusinessMonthBase, Base):
@@ -2116,8 +2116,8 @@ def test_holidays(self):
     def test_datetimeindex(self):
         hcal = USFederalHolidayCalendar()
         cbmb = CBMonthBegin(calendar=hcal)
-        assert (DatetimeIndex(start='20120101', end='20130101',
-                              freq=cbmb).tolist()[0] == datetime(2012, 1, 3))
+        assert (date_range(start='20120101', end='20130101',
+                           freq=cbmb).tolist()[0] == datetime(2012, 1, 3))
 
 
 class TestWeek(Base):
@@ -2419,7 +2419,7 @@ def test_offset_whole_year(self):
         tm.assert_index_equal(result, exp)
 
         # ensure generating a range with DatetimeIndex gives same result
-        result = DatetimeIndex(start=dates[0], end=dates[-1], freq='SM')
+        result = date_range(start=dates[0], end=dates[-1], freq='SM')
         exp = DatetimeIndex(dates)
         tm.assert_index_equal(result, exp)
 
@@ -2606,7 +2606,7 @@ def test_offset_whole_year(self):
         tm.assert_index_equal(result, exp)
 
         # ensure generating a range with DatetimeIndex gives same result
-        result = DatetimeIndex(start=dates[0], end=dates[-1], freq='SMS')
+        result = date_range(start=dates[0], end=dates[-1], freq='SMS')
         exp = DatetimeIndex(dates)
         tm.assert_index_equal(result, exp)
 
diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py
index 40e2b76672a4e0..4016114919f5b8 100644
--- a/pandas/tseries/holiday.py
+++ b/pandas/tseries/holiday.py
@@ -7,7 +7,7 @@
 from pandas.compat import add_metaclass
 from pandas.errors import PerformanceWarning
 
-from pandas import DateOffset, DatetimeIndex, Series, Timestamp
+from pandas import DateOffset, Series, Timestamp, date_range
 
 from pandas.tseries.offsets import Day, Easter
 
@@ -254,9 +254,9 @@ def _reference_dates(self, start_date, end_date):
         reference_end_date = Timestamp(
             datetime(end_date.year + 1, self.month, self.day))
         # Don't process unnecessary holidays
-        dates = DatetimeIndex(start=reference_start_date,
-                              end=reference_end_date,
-                              freq=year_offset, tz=start_date.tz)
+        dates = date_range(start=reference_start_date,
+                           end=reference_end_date,
+                           freq=year_offset, tz=start_date.tz)
 
         return dates
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 210620f2092cf4..24aff12e641923 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -34,7 +34,7 @@
 from pandas import (
     Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Index,
     IntervalIndex, MultiIndex, Panel, PeriodIndex, RangeIndex, Series,
-    TimedeltaIndex, bdate_range)
+    bdate_range)
 from pandas.core.algorithms import take_1d
 from pandas.core.arrays import (
     DatetimeArrayMixin as DatetimeArray, ExtensionArray, IntervalArray,
@@ -1938,8 +1938,8 @@ def makeDateIndex(k=10, freq='B', name=None, **kwargs):
 
 
 def makeTimedeltaIndex(k=10, freq='D', name=None, **kwargs):
-    return TimedeltaIndex(start='1 day', periods=k, freq=freq,
-                          name=name, **kwargs)
+    return pd.timedelta_range(start='1 day', periods=k, freq=freq,
+                              name=name, **kwargs)
 
 
 def makePeriodIndex(k=10, name=None, **kwargs):

From 982c169a286d436481364d2a6c27b355a83472b5 Mon Sep 17 00:00:00 2001
From: Christopher Whelan <topherwhelan@gmail.com>
Date: Wed, 28 Nov 2018 16:29:58 -0800
Subject: [PATCH 28/78] CLN: remove unused import causing flake8 error on
 master (#23989)

---
 pandas/core/groupby/ops.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 6dcbedfa112bb5..7391190b5f1922 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -7,7 +7,6 @@
 """
 
 import collections
-import copy
 
 import numpy as np
 

From 43b2dabdcab50e0c83e37b0b8debcd579c539a9f Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Thu, 29 Nov 2018 00:25:38 -0800
Subject: [PATCH 29/78] MAINT: Arithmetc --> Arithmetic

---
 pandas/tests/arithmetic/test_datetime64.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index 57cf23a39a944b..667c2b4103e001 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Arithmetc tests for DataFrame/Series/Index/Array classes that should
+# Arithmetic tests for DataFrame/Series/Index/Array classes that should
 # behave identically.
 # Specifically for datetime64 and datetime64tz dtypes
 import operator

From d9a037ec4ad0aab0f5bf2ad18a30554c38299e57 Mon Sep 17 00:00:00 2001
From: Prabakaran Kumaresshan <k_prabakaran@hotmail.com>
Date: Thu, 29 Nov 2018 17:53:40 +0530
Subject: [PATCH 30/78] BUG: Fix lxml import in show_versions (#23949)

* BUG: Fix lxml import in show_versions

Fixes #23934
---
 pandas/util/_print_versions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py
index 3016bf04b5258e..a5c86c2cc80b39 100644
--- a/pandas/util/_print_versions.py
+++ b/pandas/util/_print_versions.py
@@ -85,7 +85,7 @@ def show_versions(as_json=False):
         ("xlrd", lambda mod: mod.__VERSION__),
         ("xlwt", lambda mod: mod.__VERSION__),
         ("xlsxwriter", lambda mod: mod.__version__),
-        ("lxml", lambda mod: mod.etree.__version__),
+        ("lxml.etree", lambda mod: mod.__version__),
         ("bs4", lambda mod: mod.__version__),
         ("html5lib", lambda mod: mod.__version__),
         ("sqlalchemy", lambda mod: mod.__version__),

From b219bf9fe85c573d7f5ce965a61164205e606f27 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Thu, 29 Nov 2018 04:46:42 -0800
Subject: [PATCH 31/78] CLN: Add missing types to tslibs/conversion.pyx
 (#23984)

---
 pandas/_libs/tslibs/conversion.pyx | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index bf5429c39e8fee..67c2793e4bcefd 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -4,7 +4,7 @@ from cython import Py_ssize_t
 
 import numpy as np
 cimport numpy as cnp
-from numpy cimport int64_t, int32_t, ndarray
+from numpy cimport uint8_t, int64_t, int32_t, ndarray
 cnp.import_array()
 
 import pytz
@@ -535,6 +535,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
         int64_t[:] deltas
         int64_t local_val
         Py_ssize_t pos
+        str typ
 
     assert obj.tzinfo is None
 
@@ -646,7 +647,7 @@ cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz,
     if not is_tzlocal(tz):
         # get_dst_info cannot extract offsets from tzlocal because its
         # dependent on a datetime
-        trans, deltas, typ = get_dst_info(tz)
+        trans, deltas, _ = get_dst_info(tz)
         if not to_utc:
             # We add `offset` below instead of subtracting it
             deltas = -1 * np.array(deltas, dtype='i8')
@@ -690,7 +691,7 @@ cdef inline int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz,
     """
     cdef:
         npy_datetimestruct dts
-        int64_t result, delta
+        int64_t delta
         datetime dt
 
     dt64_to_dtstruct(val, &dts)
@@ -879,18 +880,20 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
     localized : ndarray[int64_t]
     """
     cdef:
-        ndarray[int64_t] trans
         int64_t[:] deltas, idx_shifted, idx_shifted_left, idx_shifted_right
-        ndarray ambiguous_array
+        ndarray[uint8_t, cast=True] ambiguous_array, both_nat, both_eq
         Py_ssize_t i, idx, pos, ntrans, n = len(vals)
         Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
         int64_t *tdata
         int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
         int64_t HOURS_NS = HOUR_SECONDS * 1000000000
-        ndarray[int64_t] result, result_a, result_b, dst_hours
+        ndarray[int64_t] trans, result, result_a, result_b, dst_hours
+        ndarray[int64_t] trans_idx, grp, delta, a_idx, b_idx, one_diff
         npy_datetimestruct dts
         bint infer_dst = False, is_dst = False, fill = False
         bint shift = False, fill_nonexist = False
+        list trans_grp
+        str stamp
 
     # Vectorized version of DstTzInfo.localize
     if is_utc(tz) or tz is None:
@@ -923,7 +926,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
         if len(ambiguous) != len(vals):
             raise ValueError("Length of ambiguous bool-array must be "
                              "the same size as vals")
-        ambiguous_array = np.asarray(ambiguous)
+        ambiguous_array = np.asarray(ambiguous, dtype=bool)
 
     if nonexistent == 'NaT':
         fill_nonexist = True
@@ -933,7 +936,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
         assert nonexistent in ('raise', None), ("nonexistent must be one of"
                                                 " {'NaT', 'raise', 'shift'}")
 
-    trans, deltas, typ = get_dst_info(tz)
+    trans, deltas, _ = get_dst_info(tz)
 
     tdata = <int64_t*>cnp.PyArray_DATA(trans)
     ntrans = len(trans)
@@ -984,7 +987,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
         # years which is useful for checking that there is not an ambiguous
         # transition in an individual year.
         if trans_idx.size > 0:
-            one_diff = np.where(np.diff(trans_idx) != 1)[0] +1
+            one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1
             trans_grp = np.array_split(trans_idx, one_diff)
 
             # Iterate through each day, if there are no hours where the
@@ -1172,13 +1175,14 @@ cdef int64_t[:] _normalize_local(int64_t[:] stamps, tzinfo tz):
     result : int64 ndarray of converted of normalized nanosecond timestamps
     """
     cdef:
-        Py_ssize_t n = len(stamps)
+        Py_ssize_t i, n = len(stamps)
         int64_t[:] result = np.empty(n, dtype=np.int64)
         ndarray[int64_t] trans
         int64_t[:] deltas
+        str typ
         Py_ssize_t[:] pos
         npy_datetimestruct dts
-        int64_t delta
+        int64_t delta, local_val
 
     if is_utc(tz):
         with nogil:
@@ -1264,6 +1268,7 @@ def is_date_array_normalized(int64_t[:] stamps, object tz=None):
         int64_t[:] deltas
         npy_datetimestruct dts
         int64_t local_val, delta
+        str typ
 
     if tz is None or is_utc(tz):
         for i in range(n):

From d887927d95ed17e5c5ddf354eb71db94fc9f1a5d Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 29 Nov 2018 04:48:26 -0800
Subject: [PATCH 32/78] implement truediv, rtruediv directly in TimedeltaArray;
 tests (#23829)

---
 doc/source/whatsnew/v0.24.0.rst             |   2 +-
 pandas/core/arrays/timedeltas.py            | 102 ++++++++++++++-
 pandas/core/indexes/base.py                 |  13 +-
 pandas/core/indexes/timedeltas.py           |  23 +++-
 pandas/tests/arithmetic/test_timedelta64.py | 131 ++++++++++++++++++--
 5 files changed, 249 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index c74bcb505b6beb..d4b2fefff322fc 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1256,7 +1256,7 @@ Timedelta
 - Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`)
 - Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-`NaT` :class:`DatetimeIndex` instead of an all-`NaT` :class:`TimedeltaIndex` (:issue:`23215`)
 - Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`)
-
+- Bug in :class:`TimedeltaIndex` division where dividing by another :class:`TimedeltaIndex` raised ``TypeError`` instead of returning a :class:`Float64Index` (:issue:`23829`, :issue:`22631`)
 
 Timezones
 ^^^^^^^^^
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 83cea51cec9f6f..3f14b61c1ea6c0 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from pandas._libs import algos, tslibs
+from pandas._libs import algos, lib, tslibs
 from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
 from pandas._libs.tslibs.fields import get_timedelta_field
 from pandas._libs.tslibs.timedeltas import (
@@ -177,7 +177,7 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False):
                                          passed=freq.freqstr))
             elif freq is None:
                 freq = inferred_freq
-                freq_infer = False
+            freq_infer = False
 
         result = cls._simple_new(values, freq=freq)
         # check that we are matching freqs
@@ -355,12 +355,108 @@ def _evaluate_with_timedelta_like(self, other, op):
 
     __mul__ = _wrap_tdi_op(operator.mul)
     __rmul__ = __mul__
-    __truediv__ = _wrap_tdi_op(operator.truediv)
     __floordiv__ = _wrap_tdi_op(operator.floordiv)
     __rfloordiv__ = _wrap_tdi_op(ops.rfloordiv)
 
+    def __truediv__(self, other):
+        # timedelta / X is well-defined for timedelta-like or numeric X
+        other = lib.item_from_zerodim(other)
+
+        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
+            return NotImplemented
+
+        if isinstance(other, (timedelta, np.timedelta64, Tick)):
+            other = Timedelta(other)
+            if other is NaT:
+                # specifically timedelta64-NaT
+                result = np.empty(self.shape, dtype=np.float64)
+                result.fill(np.nan)
+                return result
+
+            # otherwise, dispatch to Timedelta implementation
+            return self._data / other
+
+        elif lib.is_scalar(other):
+            # assume it is numeric
+            result = self._data / other
+            freq = None
+            if self.freq is not None:
+                # Tick division is not implemented, so operate on Timedelta
+                freq = self.freq.delta / other
+            return type(self)(result, freq=freq)
+
+        if not hasattr(other, "dtype"):
+            # e.g. list, tuple
+            other = np.array(other)
+
+        if len(other) != len(self):
+            raise ValueError("Cannot divide vectors with unequal lengths")
+
+        elif is_timedelta64_dtype(other):
+            # let numpy handle it
+            return self._data / other
+
+        elif is_object_dtype(other):
+            # Note: we do not do type inference on the result, so either
+            #  an object array or numeric-dtyped (if numpy does inference)
+            #  will be returned.  GH#23829
+            result = [self[n] / other[n] for n in range(len(self))]
+            result = np.array(result)
+            return result
+
+        else:
+            result = self._data / other
+            return type(self)(result)
+
+    def __rtruediv__(self, other):
+        # X / timedelta is defined only for timedelta-like X
+        other = lib.item_from_zerodim(other)
+
+        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
+            return NotImplemented
+
+        if isinstance(other, (timedelta, np.timedelta64, Tick)):
+            other = Timedelta(other)
+            if other is NaT:
+                # specifically timedelta64-NaT
+                result = np.empty(self.shape, dtype=np.float64)
+                result.fill(np.nan)
+                return result
+
+            # otherwise, dispatch to Timedelta implementation
+            return other / self._data
+
+        elif lib.is_scalar(other):
+            raise TypeError("Cannot divide {typ} by {cls}"
+                            .format(typ=type(other).__name__,
+                                    cls=type(self).__name__))
+
+        if not hasattr(other, "dtype"):
+            # e.g. list, tuple
+            other = np.array(other)
+
+        if len(other) != len(self):
+            raise ValueError("Cannot divide vectors with unequal lengths")
+
+        elif is_timedelta64_dtype(other):
+            # let numpy handle it
+            return other / self._data
+
+        elif is_object_dtype(other):
+            # Note: unlike in __truediv__, we do not _need_ to do type#
+            #  inference on the result.  It does not raise, a numeric array
+            #  is returned.  GH#23829
+            result = [other[n] / self[n] for n in range(len(self))]
+            return np.array(result)
+
+        else:
+            raise TypeError("Cannot divide {dtype} data by {cls}"
+                            .format(dtype=other.dtype,
+                                    cls=type(self).__name__))
+
     if compat.PY2:
         __div__ = __truediv__
+        __rdiv__ = __rtruediv__
 
     # Note: TimedeltaIndex overrides this in call to cls._add_numeric_methods
     def __neg__(self):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index e8a2dd4879f204..de59c035b81b52 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -5021,11 +5021,14 @@ def _add_numeric_methods_binary(cls):
         cls.__mod__ = _make_arithmetic_op(operator.mod, cls)
         cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls)
         cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls)
-        cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls)
-        cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls)
-        if not compat.PY3:
-            cls.__div__ = _make_arithmetic_op(operator.div, cls)
-            cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls)
+
+        if not issubclass(cls, ABCTimedeltaIndex):
+            # GH#23829 TimedeltaIndex defines these directly
+            cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls)
+            cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls)
+            if not compat.PY3:
+                cls.__div__ = _make_arithmetic_op(operator.div, cls)
+                cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls)
 
         cls.__divmod__ = _make_arithmetic_op(divmod, cls)
 
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 63b1ac6a99503d..4be896049176c2 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -248,11 +248,8 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
 
     __mul__ = Index.__mul__
     __rmul__ = Index.__rmul__
-    __truediv__ = Index.__truediv__
     __floordiv__ = Index.__floordiv__
     __rfloordiv__ = Index.__rfloordiv__
-    if compat.PY2:
-        __div__ = Index.__div__
 
     days = wrap_field_accessor(TimedeltaArray.days)
     seconds = wrap_field_accessor(TimedeltaArray.seconds)
@@ -261,6 +258,26 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
 
     total_seconds = wrap_array_method(TimedeltaArray.total_seconds, True)
 
+    def __truediv__(self, other):
+        oth = other
+        if isinstance(other, Index):
+            # TimedeltaArray defers, so we need to unwrap
+            oth = other._values
+        result = TimedeltaArray.__truediv__(self, oth)
+        return wrap_arithmetic_op(self, other, result)
+
+    def __rtruediv__(self, other):
+        oth = other
+        if isinstance(other, Index):
+            # TimedeltaArray defers, so we need to unwrap
+            oth = other._values
+        result = TimedeltaArray.__rtruediv__(self, oth)
+        return wrap_arithmetic_op(self, other, result)
+
+    if compat.PY2:
+        __div__ = __truediv__
+        __rdiv__ = __rtruediv__
+
     # Compat for frequency inference, see GH#23789
     _is_monotonic_increasing = Index.is_monotonic_increasing
     _is_monotonic_decreasing = Index.is_monotonic_decreasing
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
index 2b300cb1012012..81e7062c23fbe6 100644
--- a/pandas/tests/arithmetic/test_timedelta64.py
+++ b/pandas/tests/arithmetic/test_timedelta64.py
@@ -455,10 +455,10 @@ def test_td64arr_add_sub_timestamp(self, box_with_array):
         ts = Timestamp('2012-01-01')
         # TODO: parametrize over types of datetime scalar?
 
-        tdarr = timedelta_range('1 day', periods=3)
+        tdi = timedelta_range('1 day', periods=3)
         expected = pd.date_range('2012-01-02', periods=3)
 
-        tdarr = tm.box_expected(tdarr, box_with_array)
+        tdarr = tm.box_expected(tdi, box_with_array)
         expected = tm.box_expected(expected, box_with_array)
 
         tm.assert_equal(ts + tdarr, expected)
@@ -1112,14 +1112,33 @@ def test_tdi_rmul_arraylike(self, other, box_with_array):
         tm.assert_equal(commute, expected)
 
     # ------------------------------------------------------------------
-    # __div__
+    # __div__, __rdiv__
 
     def test_td64arr_div_nat_invalid(self, box_with_array):
         # don't allow division by NaT (maybe could in the future)
         rng = timedelta_range('1 days', '10 days', name='foo')
         rng = tm.box_expected(rng, box_with_array)
-        with pytest.raises(TypeError):
+
+        with pytest.raises(TypeError, match='true_divide cannot use operands'):
             rng / pd.NaT
+        with pytest.raises(TypeError, match='Cannot divide NaTType by'):
+            pd.NaT / rng
+
+    def test_td64arr_div_td64nat(self, box_with_array):
+        # GH#23829
+        rng = timedelta_range('1 days', '10 days',)
+        rng = tm.box_expected(rng, box_with_array)
+
+        other = np.timedelta64('NaT')
+
+        expected = np.array([np.nan] * 10)
+        expected = tm.box_expected(expected, box_with_array)
+
+        result = rng / other
+        tm.assert_equal(result, expected)
+
+        result = other / rng
+        tm.assert_equal(result, expected)
 
     def test_td64arr_div_int(self, box_with_array):
         idx = TimedeltaIndex(np.arange(5, dtype='int64'))
@@ -1128,7 +1147,11 @@ def test_td64arr_div_int(self, box_with_array):
         result = idx / 1
         tm.assert_equal(result, idx)
 
-    def test_tdi_div_tdlike_scalar(self, two_hours, box_with_array):
+        with pytest.raises(TypeError, match='Cannot divide'):
+            # GH#23829
+            1 / idx
+
+    def test_td64arr_div_tdlike_scalar(self, two_hours, box_with_array):
         # GH#20088, GH#22163 ensure DataFrame returns correct dtype
         rng = timedelta_range('1 days', '10 days', name='foo')
         expected = pd.Float64Index((np.arange(10) + 1) * 12, name='foo')
@@ -1139,7 +1162,12 @@ def test_tdi_div_tdlike_scalar(self, two_hours, box_with_array):
         result = rng / two_hours
         tm.assert_equal(result, expected)
 
-    def test_tdi_div_tdlike_scalar_with_nat(self, two_hours, box_with_array):
+        result = two_hours / rng
+        expected = 1 / expected
+        tm.assert_equal(result, expected)
+
+    def test_td64arr_div_tdlike_scalar_with_nat(self, two_hours,
+                                                box_with_array):
         rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo')
         expected = pd.Float64Index([12, np.nan, 24], name='foo')
 
@@ -1149,6 +1177,58 @@ def test_tdi_div_tdlike_scalar_with_nat(self, two_hours, box_with_array):
         result = rng / two_hours
         tm.assert_equal(result, expected)
 
+        result = two_hours / rng
+        expected = 1 / expected
+        tm.assert_equal(result, expected)
+
+    def test_td64arr_div_td64_ndarray(self, box_with_array):
+        # GH#22631
+        rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'])
+        expected = pd.Float64Index([12, np.nan, 24])
+
+        rng = tm.box_expected(rng, box_with_array)
+        expected = tm.box_expected(expected, box_with_array)
+
+        other = np.array([2, 4, 2], dtype='m8[h]')
+        result = rng / other
+        tm.assert_equal(result, expected)
+
+        result = rng / tm.box_expected(other, box_with_array)
+        tm.assert_equal(result, expected)
+
+        result = rng / other.astype(object)
+        tm.assert_equal(result, expected)
+
+        result = rng / list(other)
+        tm.assert_equal(result, expected)
+
+        # reversed op
+        expected = 1 / expected
+        result = other / rng
+        tm.assert_equal(result, expected)
+
+        result = tm.box_expected(other, box_with_array) / rng
+        tm.assert_equal(result, expected)
+
+        result = other.astype(object) / rng
+        tm.assert_equal(result, expected)
+
+        result = list(other) / rng
+        tm.assert_equal(result, expected)
+
+    def test_tdarr_div_length_mismatch(self, box_with_array):
+        rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'])
+        mismatched = [1, 2, 3, 4]
+
+        rng = tm.box_expected(rng, box_with_array)
+        for obj in [mismatched, mismatched[:2]]:
+            # one shorter, one longer
+            for other in [obj, np.array(obj), pd.Index(obj)]:
+                with pytest.raises(ValueError):
+                    rng / other
+                with pytest.raises(ValueError):
+                    other / rng
+
     # ------------------------------------------------------------------
     # __floordiv__, __rfloordiv__
 
@@ -1200,6 +1280,10 @@ def test_td64arr_floordiv_int(self, box_with_array):
         result = idx // 1
         tm.assert_equal(result, idx)
 
+        pattern = 'floor_divide cannot use operands'
+        with pytest.raises(TypeError, match=pattern):
+            1 // idx
+
     def test_td64arr_floordiv_tdlike_scalar(self, two_hours, box_with_array):
         tdi = timedelta_range('1 days', '10 days', name='foo')
         expected = pd.Int64Index((np.arange(10) + 1) * 12, name='foo')
@@ -1306,6 +1390,9 @@ def test_td64arr_div_numeric_scalar(self, box_with_array, two):
         result = tdser / two
         tm.assert_equal(result, expected)
 
+        with pytest.raises(TypeError, match='Cannot divide'):
+            two / tdser
+
     @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16',
                                        'uint64', 'uint32', 'uint16', 'uint8',
                                        'float64', 'float32', 'float16'])
@@ -1355,9 +1442,28 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, dtype):
         result = tdser / vector
         tm.assert_equal(result, expected)
 
-        with pytest.raises(TypeError):
+        pattern = ('true_divide cannot use operands|'
+                   'cannot perform __div__|'
+                   'cannot perform __truediv__|'
+                   'unsupported operand|'
+                   'Cannot divide')
+        with pytest.raises(TypeError, match=pattern):
             vector / tdser
 
+        if not isinstance(vector, pd.Index):
+            # Index.__rdiv__ won't try to operate elementwise, just raises
+            result = tdser / vector.astype(object)
+            if box_with_array is pd.DataFrame:
+                expected = [tdser.iloc[0, n] / vector[n]
+                            for n in range(len(vector))]
+            else:
+                expected = [tdser[n] / vector[n] for n in range(len(tdser))]
+            expected = tm.box_expected(expected, xbox)
+            tm.assert_equal(result, expected)
+
+        with pytest.raises(TypeError, match=pattern):
+            vector.astype(object) / tdser
+
     @pytest.mark.parametrize('names', [(None, None, None),
                                        ('Egon', 'Venkman', None),
                                        ('NCC1701D', 'NCC1701D', 'NCC1701D')])
@@ -1388,20 +1494,25 @@ def test_td64arr_mul_int_series(self, box_df_fail, names):
     @pytest.mark.parametrize('names', [(None, None, None),
                                        ('Egon', 'Venkman', None),
                                        ('NCC1701D', 'NCC1701D', 'NCC1701D')])
-    def test_float_series_rdiv_td64arr(self, box, names):
+    def test_float_series_rdiv_td64arr(self, box_with_array, names):
         # GH#19042 test for correct name attachment
         # TODO: the direct operation TimedeltaIndex / Series still
         # needs to be fixed.
+        box = box_with_array
         tdi = TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'],
                              name=names[0])
         ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1])
 
+        xname = names[2] if box is not tm.to_array else names[1]
         expected = Series([tdi[n] / ser[n] for n in range(len(ser))],
                           dtype='timedelta64[ns]',
-                          name=names[2])
+                          name=xname)
+
+        xbox = box
+        if box in [pd.Index, tm.to_array] and type(ser) is Series:
+            xbox = Series
 
         tdi = tm.box_expected(tdi, box)
-        xbox = Series if (box is pd.Index and type(ser) is Series) else box
         expected = tm.box_expected(expected, xbox)
 
         result = ser.__rdiv__(tdi)

From 005f44e12d8f47b5de64ee91101b158cdcd13a8e Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Thu, 29 Nov 2018 13:51:05 +0000
Subject: [PATCH 33/78] TST: split up pandas/tests/indexing/test_multiindex.py
 (#23912)

---
 pandas/tests/indexing/multiindex/__init__.py  |    0
 pandas/tests/indexing/multiindex/conftest.py  |   32 +
 .../indexing/multiindex/test_datetime.py      |   22 +
 .../tests/indexing/multiindex/test_getitem.py |  345 +++
 pandas/tests/indexing/multiindex/test_iloc.py |  129 +
 pandas/tests/indexing/multiindex/test_ix.py   |   27 +
 pandas/tests/indexing/multiindex/test_loc.py  |  177 ++
 .../indexing/multiindex/test_multiindex.py    |   71 +
 .../tests/indexing/multiindex/test_panel.py   |  103 +
 .../tests/indexing/multiindex/test_partial.py |  183 ++
 .../tests/indexing/multiindex/test_set_ops.py |   42 +
 .../tests/indexing/multiindex/test_setitem.py |  404 +++
 .../tests/indexing/multiindex/test_slice.py   |  572 +++++
 .../tests/indexing/multiindex/test_sorted.py  |   92 +
 pandas/tests/indexing/multiindex/test_xs.py   |  164 ++
 pandas/tests/indexing/test_multiindex.py      | 2249 -----------------
 16 files changed, 2363 insertions(+), 2249 deletions(-)
 create mode 100644 pandas/tests/indexing/multiindex/__init__.py
 create mode 100644 pandas/tests/indexing/multiindex/conftest.py
 create mode 100644 pandas/tests/indexing/multiindex/test_datetime.py
 create mode 100644 pandas/tests/indexing/multiindex/test_getitem.py
 create mode 100644 pandas/tests/indexing/multiindex/test_iloc.py
 create mode 100644 pandas/tests/indexing/multiindex/test_ix.py
 create mode 100644 pandas/tests/indexing/multiindex/test_loc.py
 create mode 100644 pandas/tests/indexing/multiindex/test_multiindex.py
 create mode 100644 pandas/tests/indexing/multiindex/test_panel.py
 create mode 100644 pandas/tests/indexing/multiindex/test_partial.py
 create mode 100644 pandas/tests/indexing/multiindex/test_set_ops.py
 create mode 100644 pandas/tests/indexing/multiindex/test_setitem.py
 create mode 100644 pandas/tests/indexing/multiindex/test_slice.py
 create mode 100644 pandas/tests/indexing/multiindex/test_sorted.py
 create mode 100644 pandas/tests/indexing/multiindex/test_xs.py
 delete mode 100644 pandas/tests/indexing/test_multiindex.py

diff --git a/pandas/tests/indexing/multiindex/__init__.py b/pandas/tests/indexing/multiindex/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/pandas/tests/indexing/multiindex/conftest.py b/pandas/tests/indexing/multiindex/conftest.py
new file mode 100644
index 00000000000000..f578fe7c0f60fa
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/conftest.py
@@ -0,0 +1,32 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, Index, MultiIndex
+from pandas.util import testing as tm
+
+
+@pytest.fixture
+def multiindex_dataframe_random_data():
+    """DataFrame with 2 level MultiIndex with random data"""
+    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
+                                                              'three']],
+                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                       names=['first', 'second'])
+    return DataFrame(np.random.randn(10, 3), index=index,
+                     columns=Index(['A', 'B', 'C'], name='exp'))
+
+
+@pytest.fixture
+def multiindex_year_month_day_dataframe_random_data():
+    """DataFrame with 3 level MultiIndex (year, month, day) covering
+    first 100 business days from 2000-01-01 with random data"""
+    tm.N = 100
+    tdf = tm.makeTimeDataFrame()
+    ymd = tdf.groupby([lambda x: x.year, lambda x: x.month,
+                       lambda x: x.day]).sum()
+    # use Int64Index, to make sure things work
+    ymd.index.set_levels([lev.astype('i8') for lev in ymd.index.levels],
+                         inplace=True)
+    ymd.index.set_names(['year', 'month', 'day'], inplace=True)
+    return ymd
diff --git a/pandas/tests/indexing/multiindex/test_datetime.py b/pandas/tests/indexing/multiindex/test_datetime.py
new file mode 100644
index 00000000000000..a270ab32e9b041
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_datetime.py
@@ -0,0 +1,22 @@
+from datetime import datetime
+
+import numpy as np
+
+from pandas import Index, Period, Series, period_range
+
+
+def test_multiindex_period_datetime():
+    # GH4861, using datetime in period of multiindex raises exception
+
+    idx1 = Index(['a', 'a', 'a', 'b', 'b'])
+    idx2 = period_range('2012-01', periods=len(idx1), freq='M')
+    s = Series(np.random.randn(len(idx1)), [idx1, idx2])
+
+    # try Period as index
+    expected = s.iloc[0]
+    result = s.loc['a', Period('2012-01')]
+    assert result == expected
+
+    # try datetime as index
+    result = s.loc['a', datetime(2012, 1, 1)]
+    assert result == expected
diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
new file mode 100644
index 00000000000000..efc1ebcbecee7e
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -0,0 +1,345 @@
+from warnings import catch_warnings, simplefilter
+
+import numpy as np
+import pytest
+
+from pandas.compat import lrange, range, u, zip
+
+import pandas as pd
+from pandas import DataFrame, Index, MultiIndex, Series, date_range
+import pandas.core.common as com
+from pandas.util import testing as tm
+
+
+@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
+class TestMultiIndexGetItem(object):
+
+    def test_series_getitem_multiindex(self):
+
+        # GH 6018
+        # series regression getitem with a multi-index
+
+        s = Series([1, 2, 3])
+        s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)])
+
+        result = s[:, 0]
+        expected = Series([1], index=[0])
+        tm.assert_series_equal(result, expected)
+
+        result = s.loc[:, 1]
+        expected = Series([2, 3], index=[1, 2])
+        tm.assert_series_equal(result, expected)
+
+        # xs
+        result = s.xs(0, level=0)
+        expected = Series([1], index=[0])
+        tm.assert_series_equal(result, expected)
+
+        result = s.xs(1, level=1)
+        expected = Series([2, 3], index=[1, 2])
+        tm.assert_series_equal(result, expected)
+
+        # GH6258
+        dt = list(date_range('20130903', periods=3))
+        idx = MultiIndex.from_product([list('AB'), dt])
+        s = Series([1, 3, 4, 1, 3, 4], index=idx)
+
+        result = s.xs('20130903', level=1)
+        expected = Series([1, 1], index=list('AB'))
+        tm.assert_series_equal(result, expected)
+
+        # GH5684
+        idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'),
+                                      ('b', 'two')])
+        s = Series([1, 2, 3, 4], index=idx)
+        s.index.set_names(['L1', 'L2'], inplace=True)
+        result = s.xs('one', level='L2')
+        expected = Series([1, 3], index=['a', 'b'])
+        expected.index.set_names(['L1'], inplace=True)
+        tm.assert_series_equal(result, expected)
+
+    def test_getitem_duplicates_multiindex(self):
+        # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
+        # the appropriate error, only in PY3 of course!
+
+        index = MultiIndex(levels=[['D', 'B', 'C'],
+                                   [0, 26, 27, 37, 57, 67, 75, 82]],
+                           labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
+                                   [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
+                           names=['tag', 'day'])
+        arr = np.random.randn(len(index), 1)
+        df = DataFrame(arr, index=index, columns=['val'])
+        result = df.val['D']
+        expected = Series(arr.ravel()[0:3], name='val', index=Index(
+            [26, 37, 57], name='day'))
+        tm.assert_series_equal(result, expected)
+
+        def f():
+            df.val['A']
+
+        pytest.raises(KeyError, f)
+
+        def f():
+            df.val['X']
+
+        pytest.raises(KeyError, f)
+
+        # A is treated as a special Timestamp
+        index = MultiIndex(levels=[['A', 'B', 'C'],
+                                   [0, 26, 27, 37, 57, 67, 75, 82]],
+                           labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
+                                   [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
+                           names=['tag', 'day'])
+        df = DataFrame(arr, index=index, columns=['val'])
+        result = df.val['A']
+        expected = Series(arr.ravel()[0:3], name='val', index=Index(
+            [26, 37, 57], name='day'))
+        tm.assert_series_equal(result, expected)
+
+        def f():
+            df.val['X']
+
+        pytest.raises(KeyError, f)
+
+        # GH 7866
+        # multi-index slicing with missing indexers
+        idx = MultiIndex.from_product([['A', 'B', 'C'],
+                                       ['foo', 'bar', 'baz']],
+                                      names=['one', 'two'])
+        s = Series(np.arange(9, dtype='int64'), index=idx).sort_index()
+
+        exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']],
+                                          names=['one', 'two'])
+        expected = Series(np.arange(3, dtype='int64'),
+                          index=exp_idx).sort_index()
+
+        result = s.loc[['A']]
+        tm.assert_series_equal(result, expected)
+        result = s.loc[['A', 'D']]
+        tm.assert_series_equal(result, expected)
+
+        # not any values found
+        pytest.raises(KeyError, lambda: s.loc[['D']])
+
+        # empty ok
+        result = s.loc[[]]
+        expected = s.iloc[[]]
+        tm.assert_series_equal(result, expected)
+
+        idx = pd.IndexSlice
+        expected = Series([0, 3, 6], index=MultiIndex.from_product(
+            [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index()
+
+        result = s.loc[idx[:, ['foo']]]
+        tm.assert_series_equal(result, expected)
+        result = s.loc[idx[:, ['foo', 'bah']]]
+        tm.assert_series_equal(result, expected)
+
+        # GH 8737
+        # empty indexer
+        multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'],
+                                               ['alpha', 'beta']))
+        df = DataFrame(
+            np.random.randn(5, 6), index=range(5), columns=multi_index)
+        df = df.sort_index(level=0, axis=1)
+
+        expected = DataFrame(index=range(5),
+                             columns=multi_index.reindex([])[0])
+        result1 = df.loc[:, ([], slice(None))]
+        result2 = df.loc[:, (['foo'], [])]
+        tm.assert_frame_equal(result1, expected)
+        tm.assert_frame_equal(result2, expected)
+
+        # regression from < 0.14.0
+        # GH 7914
+        df = DataFrame([[np.mean, np.median], ['mean', 'median']],
+                       columns=MultiIndex.from_tuples([('functs', 'mean'),
+                                                       ('functs', 'median')]),
+                       index=['function', 'name'])
+        result = df.loc['function', ('functs', 'mean')]
+        assert result == np.mean
+
+    def test_getitem_simple(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        df = frame.T
+
+        col = df['foo', 'one']
+        tm.assert_almost_equal(col.values, df.values[:, 0])
+        with pytest.raises(KeyError):
+            df[('foo', 'four')]
+        with pytest.raises(KeyError):
+            df['foobar']
+
+    def test_series_getitem(
+            self, multiindex_year_month_day_dataframe_random_data):
+        ymd = multiindex_year_month_day_dataframe_random_data
+        s = ymd['A']
+
+        result = s[2000, 3]
+
+        # TODO(wesm): unused?
+        # result2 = s.loc[2000, 3]
+
+        expected = s.reindex(s.index[42:65])
+        expected.index = expected.index.droplevel(0).droplevel(0)
+        tm.assert_series_equal(result, expected)
+
+        result = s[2000, 3, 10]
+        expected = s[49]
+        assert result == expected
+
+        # fancy
+        expected = s.reindex(s.index[49:51])
+        result = s.loc[[(2000, 3, 10), (2000, 3, 13)]]
+        tm.assert_series_equal(result, expected)
+
+        with catch_warnings(record=True):
+            simplefilter("ignore", DeprecationWarning)
+            result = s.ix[[(2000, 3, 10), (2000, 3, 13)]]
+        tm.assert_series_equal(result, expected)
+
+        # key error
+        pytest.raises(KeyError, s.__getitem__, (2000, 3, 4))
+
+    def test_series_getitem_corner(
+            self, multiindex_year_month_day_dataframe_random_data):
+        ymd = multiindex_year_month_day_dataframe_random_data
+        s = ymd['A']
+
+        # don't segfault, GH #495
+        # out of bounds access
+        pytest.raises(IndexError, s.__getitem__, len(ymd))
+
+        # generator
+        result = s[(x > 0 for x in s)]
+        expected = s[s > 0]
+        tm.assert_series_equal(result, expected)
+
+    def test_frame_getitem_multicolumn_empty_level(self):
+        f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
+        f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
+                     ['level3 item1', 'level3 item2']]
+
+        result = f['level1 item1']
+        expected = DataFrame([['1'], ['2'], ['3']], index=f.index,
+                             columns=['level3 item1'])
+        tm.assert_frame_equal(result, expected)
+
+    def test_getitem_tuple_plus_slice(self):
+        # GH #671
+        df = DataFrame({'a': lrange(10),
+                        'b': lrange(10),
+                        'c': np.random.randn(10),
+                        'd': np.random.randn(10)})
+
+        idf = df.set_index(['a', 'b'])
+
+        result = idf.loc[(0, 0), :]
+        expected = idf.loc[0, 0]
+        expected2 = idf.xs((0, 0))
+        with catch_warnings(record=True):
+            simplefilter("ignore", DeprecationWarning)
+            expected3 = idf.ix[0, 0]
+
+        tm.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected2)
+        tm.assert_series_equal(result, expected3)
+
+    def test_getitem_toplevel(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        df = frame.T
+
+        result = df['foo']
+        expected = df.reindex(columns=df.columns[:3])
+        expected.columns = expected.columns.droplevel(0)
+        tm.assert_frame_equal(result, expected)
+
+        result = df['bar']
+        result2 = df.loc[:, 'bar']
+
+        expected = df.reindex(columns=df.columns[3:5])
+        expected.columns = expected.columns.droplevel(0)
+        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, result2)
+
+    def test_getitem_int(self, multiindex_dataframe_random_data):
+        levels = [[0, 1], [0, 1, 2]]
+        labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
+        index = MultiIndex(levels=levels, labels=labels)
+
+        frame = DataFrame(np.random.randn(6, 2), index=index)
+
+        result = frame.loc[1]
+        expected = frame[-3:]
+        expected.index = expected.index.droplevel(0)
+        tm.assert_frame_equal(result, expected)
+
+        # raises exception
+        pytest.raises(KeyError, frame.loc.__getitem__, 3)
+
+        # however this will work
+        frame = multiindex_dataframe_random_data
+        result = frame.iloc[2]
+        expected = frame.xs(frame.index[2])
+        tm.assert_series_equal(result, expected)
+
+    def test_frame_getitem_view(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        df = frame.T.copy()
+
+        # this works because we are modifying the underlying array
+        # really a no-no
+        df['foo'].values[:] = 0
+        assert (df['foo'].values == 0).all()
+
+        # but not if it's mixed-type
+        df['foo', 'four'] = 'foo'
+        df = df.sort_index(level=0, axis=1)
+
+        # this will work, but will raise/warn as its chained assignment
+        def f():
+            df['foo']['one'] = 2
+            return df
+
+        pytest.raises(com.SettingWithCopyError, f)
+
+        try:
+            df = f()
+        except ValueError:
+            pass
+        assert (df['foo', 'one'] == 0).all()
+
+    def test_getitem_lowerdim_corner(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        pytest.raises(KeyError, frame.loc.__getitem__,
+                      (('bar', 'three'), 'B'))
+
+        # in theory should be inserting in a sorted space????
+        frame.loc[('bar', 'three'), 'B'] = 0
+        assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0
+
+    @pytest.mark.parametrize('unicode_strings', [True, False])
+    def test_mixed_depth_get(self, unicode_strings):
+        # If unicode_strings is True, the column labels in dataframe
+        # construction will use unicode strings in Python 2 (pull request
+        # #17099).
+
+        arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
+                  ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
+                  ['', 'wx', 'wy', '', '', '']]
+
+        if unicode_strings:
+            arrays = [[u(s) for s in arr] for arr in arrays]
+
+        tuples = sorted(zip(*arrays))
+        index = MultiIndex.from_tuples(tuples)
+        df = DataFrame(np.random.randn(4, 6), columns=index)
+
+        result = df['a']
+        expected = df['a', '', ''].rename('a')
+        tm.assert_series_equal(result, expected)
+
+        result = df['routine1', 'result1']
+        expected = df['routine1', 'result1', '']
+        expected = expected.rename(('routine1', 'result1'))
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py
new file mode 100644
index 00000000000000..c0d05197d89c45
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_iloc.py
@@ -0,0 +1,129 @@
+from warnings import catch_warnings
+
+import numpy as np
+import pytest
+
+from pandas import DataFrame, MultiIndex, Series
+from pandas.util import testing as tm
+
+
+@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
+class TestMultiIndexIloc(object):
+
+    def test_iloc_getitem_multiindex2(self):
+        # TODO(wesm): fix this
+        pytest.skip('this test was being suppressed, '
+                    'needs to be fixed')
+
+        arr = np.random.randn(3, 3)
+        df = DataFrame(arr, columns=[[2, 2, 4], [6, 8, 10]],
+                       index=[[4, 4, 8], [8, 10, 12]])
+
+        rs = df.iloc[2]
+        xp = Series(arr[2], index=df.columns)
+        tm.assert_series_equal(rs, xp)
+
+        rs = df.iloc[:, 2]
+        xp = Series(arr[:, 2], index=df.index)
+        tm.assert_series_equal(rs, xp)
+
+        rs = df.iloc[2, 2]
+        xp = df.values[2, 2]
+        assert rs == xp
+
+        # for multiple items
+        # GH 5528
+        rs = df.iloc[[0, 1]]
+        xp = df.xs(4, drop_level=False)
+        tm.assert_frame_equal(rs, xp)
+
+        tup = zip(*[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']])
+        index = MultiIndex.from_tuples(tup)
+        df = DataFrame(np.random.randn(4, 4), index=index)
+        rs = df.iloc[[2, 3]]
+        xp = df.xs('b', drop_level=False)
+        tm.assert_frame_equal(rs, xp)
+
+    def test_iloc_getitem_multiindex(self):
+        mi_labels = DataFrame(np.random.randn(4, 3),
+                              columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
+                              index=[['i', 'i', 'j', 'k'],
+                                     ['X', 'X', 'Y', 'Y']])
+
+        mi_int = DataFrame(np.random.randn(3, 3),
+                           columns=[[2, 2, 4], [6, 8, 10]],
+                           index=[[4, 4, 8], [8, 10, 12]])
+
+        # the first row
+        rs = mi_int.iloc[0]
+        with catch_warnings(record=True):
+            xp = mi_int.ix[4].ix[8]
+        tm.assert_series_equal(rs, xp, check_names=False)
+        assert rs.name == (4, 8)
+        assert xp.name == 8
+
+        # 2nd (last) columns
+        rs = mi_int.iloc[:, 2]
+        with catch_warnings(record=True):
+            xp = mi_int.ix[:, 2]
+        tm.assert_series_equal(rs, xp)
+
+        # corner column
+        rs = mi_int.iloc[2, 2]
+        with catch_warnings(record=True):
+            # First level is int - so use .loc rather than .ix (GH 21593)
+            xp = mi_int.loc[(8, 12), (4, 10)]
+        assert rs == xp
+
+        # this is basically regular indexing
+        rs = mi_labels.iloc[2, 2]
+        with catch_warnings(record=True):
+            xp = mi_labels.ix['j'].ix[:, 'j'].ix[0, 0]
+        assert rs == xp
+
+    def test_frame_getitem_setitem_slice(
+            self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        # getitem
+        result = frame.iloc[:4]
+        expected = frame[:4]
+        tm.assert_frame_equal(result, expected)
+
+        # setitem
+        cp = frame.copy()
+        cp.iloc[:4] = 0
+
+        assert (cp.values[:4] == 0).all()
+        assert (cp.values[4:] != 0).all()
+
+    def test_indexing_ambiguity_bug_1678(self):
+        columns = MultiIndex.from_tuples([('Ohio', 'Green'), ('Ohio', 'Red'), (
+            'Colorado', 'Green')])
+        index = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)
+                                        ])
+
+        frame = DataFrame(np.arange(12).reshape((4, 3)), index=index,
+                          columns=columns)
+
+        result = frame.iloc[:, 1]
+        exp = frame.loc[:, ('Ohio', 'Red')]
+        assert isinstance(result, Series)
+        tm.assert_series_equal(result, exp)
+
+    def test_iloc_mi(self):
+        # GH 13797
+        # Test if iloc can handle integer locations in MultiIndexed DataFrame
+
+        data = [['str00', 'str01'], ['str10', 'str11'], ['str20', 'srt21'],
+                ['str30', 'str31'], ['str40', 'str41']]
+
+        mi = MultiIndex.from_tuples(
+            [('CC', 'A'), ('CC', 'B'), ('CC', 'B'), ('BB', 'a'), ('BB', 'b')])
+
+        expected = DataFrame(data)
+        df_mi = DataFrame(data, index=mi)
+
+        result = DataFrame([[df_mi.iloc[r, c] for c in range(2)]
+                            for r in range(5)])
+
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexing/multiindex/test_ix.py b/pandas/tests/indexing/multiindex/test_ix.py
new file mode 100644
index 00000000000000..4e4e5674fdbd58
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_ix.py
@@ -0,0 +1,27 @@
+from warnings import catch_warnings, simplefilter
+
+import pytest
+
+from pandas.compat import lrange
+
+
+@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
+class TestMultiIndexIx(object):
+
+    def test_frame_setitem_ix(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        frame.loc[('bar', 'two'), 'B'] = 5
+        assert frame.loc[('bar', 'two'), 'B'] == 5
+
+        # with integer labels
+        df = frame.copy()
+        df.columns = lrange(3)
+        df.loc[('bar', 'two'), 1] = 7
+        assert df.loc[('bar', 'two'), 1] == 7
+
+        with catch_warnings(record=True):
+            simplefilter("ignore", DeprecationWarning)
+            df = frame.copy()
+            df.columns = lrange(3)
+            df.ix[('bar', 'two'), 1] = 7
+        assert df.loc[('bar', 'two'), 1] == 7
diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py
new file mode 100644
index 00000000000000..f31685641753e6
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_loc.py
@@ -0,0 +1,177 @@
+from warnings import catch_warnings
+
+import numpy as np
+import pytest
+
+from pandas import DataFrame, MultiIndex, Series
+from pandas.util import testing as tm
+
+
+@pytest.fixture
+def single_level_multiindex():
+    """single level MultiIndex"""
+    return MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
+                      labels=[[0, 1, 2, 3]], names=['first'])
+
+
+@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
+class TestMultiIndexLoc(object):
+
+    def test_loc_getitem_series(self):
+        # GH14730
+        # passing a series as a key with a MultiIndex
+        index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
+        x = Series(index=index, data=range(9), dtype=np.float64)
+        y = Series([1, 3])
+        expected = Series(
+            data=[0, 1, 2, 6, 7, 8],
+            index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
+            dtype=np.float64)
+        result = x.loc[y]
+        tm.assert_series_equal(result, expected)
+
+        result = x.loc[[1, 3]]
+        tm.assert_series_equal(result, expected)
+
+        # GH15424
+        y1 = Series([1, 3], index=[1, 2])
+        result = x.loc[y1]
+        tm.assert_series_equal(result, expected)
+
+        empty = Series(data=[], dtype=np.float64)
+        expected = Series([], index=MultiIndex(
+            levels=index.levels, labels=[[], []], dtype=np.float64))
+        result = x.loc[empty]
+        tm.assert_series_equal(result, expected)
+
+    def test_loc_getitem_array(self):
+        # GH15434
+        # passing an array as a key with a MultiIndex
+        index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
+        x = Series(index=index, data=range(9), dtype=np.float64)
+        y = np.array([1, 3])
+        expected = Series(
+            data=[0, 1, 2, 6, 7, 8],
+            index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
+            dtype=np.float64)
+        result = x.loc[y]
+        tm.assert_series_equal(result, expected)
+
+        # empty array:
+        empty = np.array([])
+        expected = Series([], index=MultiIndex(
+            levels=index.levels, labels=[[], []], dtype=np.float64))
+        result = x.loc[empty]
+        tm.assert_series_equal(result, expected)
+
+        # 0-dim array (scalar):
+        scalar = np.int64(1)
+        expected = Series(
+            data=[0, 1, 2],
+            index=['A', 'B', 'C'],
+            dtype=np.float64)
+        result = x.loc[scalar]
+        tm.assert_series_equal(result, expected)
+
+    def test_loc_multiindex(self):
+
+        mi_labels = DataFrame(np.random.randn(3, 3),
+                              columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
+                              index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
+
+        mi_int = DataFrame(np.random.randn(3, 3),
+                           columns=[[2, 2, 4], [6, 8, 10]],
+                           index=[[4, 4, 8], [8, 10, 12]])
+
+        # the first row
+        rs = mi_labels.loc['i']
+        with catch_warnings(record=True):
+            xp = mi_labels.ix['i']
+        tm.assert_frame_equal(rs, xp)
+
+        # 2nd (last) columns
+        rs = mi_labels.loc[:, 'j']
+        with catch_warnings(record=True):
+            xp = mi_labels.ix[:, 'j']
+        tm.assert_frame_equal(rs, xp)
+
+        # corner column
+        rs = mi_labels.loc['j'].loc[:, 'j']
+        with catch_warnings(record=True):
+            xp = mi_labels.ix['j'].ix[:, 'j']
+        tm.assert_frame_equal(rs, xp)
+
+        # with a tuple
+        rs = mi_labels.loc[('i', 'X')]
+        with catch_warnings(record=True):
+            xp = mi_labels.ix[('i', 'X')]
+        tm.assert_frame_equal(rs, xp)
+
+        rs = mi_int.loc[4]
+        with catch_warnings(record=True):
+            xp = mi_int.ix[4]
+        tm.assert_frame_equal(rs, xp)
+
+        # missing label
+        pytest.raises(KeyError, lambda: mi_int.loc[2])
+        with catch_warnings(record=True):
+            # GH 21593
+            pytest.raises(KeyError, lambda: mi_int.ix[2])
+
+    def test_loc_multiindex_indexer_none(self):
+
+        # GH6788
+        # multi-index indexer is None (meaning take all)
+        attributes = ['Attribute' + str(i) for i in range(1)]
+        attribute_values = ['Value' + str(i) for i in range(5)]
+
+        index = MultiIndex.from_product([attributes, attribute_values])
+        df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
+        df = DataFrame(df, columns=index)
+        result = df[attributes]
+        tm.assert_frame_equal(result, df)
+
+        # GH 7349
+        # loc with a multi-index seems to be doing fallback
+        df = DataFrame(np.arange(12).reshape(-1, 1),
+                       index=MultiIndex.from_product([[1, 2, 3, 4],
+                                                      [1, 2, 3]]))
+
+        expected = df.loc[([1, 2], ), :]
+        result = df.loc[[1, 2]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_loc_multiindex_incomplete(self):
+
+        # GH 7399
+        # incomplete indexers
+        s = Series(np.arange(15, dtype='int64'),
+                   MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
+        expected = s.loc[:, 'a':'c']
+
+        result = s.loc[0:4, 'a':'c']
+        tm.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
+
+        result = s.loc[:4, 'a':'c']
+        tm.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
+
+        result = s.loc[0:, 'a':'c']
+        tm.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
+
+        # GH 7400
+        # multiindexer gettitem with list of indexers skips wrong element
+        s = Series(np.arange(15, dtype='int64'),
+                   MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
+        expected = s.iloc[[6, 7, 8, 12, 13, 14]]
+        result = s.loc[2:4:2, 'a':'c']
+        tm.assert_series_equal(result, expected)
+
+    def test_get_loc_single_level(self, single_level_multiindex):
+        single_level = single_level_multiindex
+        s = Series(np.random.randn(len(single_level)),
+                   index=single_level)
+        for k in single_level.values:
+            s[k]
diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py
new file mode 100644
index 00000000000000..1fefbc0b0f8ca7
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_multiindex.py
@@ -0,0 +1,71 @@
+
+import numpy as np
+import pytest
+
+import pandas._libs.index as _index
+from pandas.errors import PerformanceWarning
+
+import pandas as pd
+from pandas import DataFrame, MultiIndex, Series
+from pandas.util import testing as tm
+
+
+class TestMultiIndexBasic(object):
+
+    def test_multiindex_perf_warn(self):
+
+        df = DataFrame({'jim': [0, 0, 1, 1],
+                        'joe': ['x', 'x', 'z', 'y'],
+                        'jolie': np.random.rand(4)}).set_index(['jim', 'joe'])
+
+        with tm.assert_produces_warning(PerformanceWarning,
+                                        clear=[pd.core.index]):
+            df.loc[(1, 'z')]
+
+        df = df.iloc[[2, 1, 3, 0]]
+        with tm.assert_produces_warning(PerformanceWarning):
+            df.loc[(0, )]
+
+    def test_multiindex_contains_dropped(self):
+        # GH 19027
+        # test that dropped MultiIndex levels are not in the MultiIndex
+        # despite continuing to be in the MultiIndex's levels
+        idx = MultiIndex.from_product([[1, 2], [3, 4]])
+        assert 2 in idx
+        idx = idx.drop(2)
+
+        # drop implementation keeps 2 in the levels
+        assert 2 in idx.levels[0]
+        # but it should no longer be in the index itself
+        assert 2 not in idx
+
+        # also applies to strings
+        idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']])
+        assert 'a' in idx
+        idx = idx.drop('a')
+        assert 'a' in idx.levels[0]
+        assert 'a' not in idx
+
+    @pytest.mark.parametrize("data, expected", [
+        (MultiIndex.from_product([(), ()]), True),
+        (MultiIndex.from_product([(1, 2), (3, 4)]), True),
+        (MultiIndex.from_product([('a', 'b'), (1, 2)]), False),
+    ])
+    def test_multiindex_is_homogeneous_type(self, data, expected):
+        assert data._is_homogeneous_type is expected
+
+    def test_indexing_over_hashtable_size_cutoff(self):
+        n = 10000
+
+        old_cutoff = _index._SIZE_CUTOFF
+        _index._SIZE_CUTOFF = 20000
+
+        s = Series(np.arange(n),
+                   MultiIndex.from_arrays((["a"] * n, np.arange(n))))
+
+        # hai it works!
+        assert s[("a", 5)] == 5
+        assert s[("a", 6)] == 6
+        assert s[("a", 7)] == 7
+
+        _index._SIZE_CUTOFF = old_cutoff
diff --git a/pandas/tests/indexing/multiindex/test_panel.py b/pandas/tests/indexing/multiindex/test_panel.py
new file mode 100644
index 00000000000000..68c8fadd2f0dd8
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_panel.py
@@ -0,0 +1,103 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame, MultiIndex, Panel, Series
+from pandas.util import testing as tm
+
+
+@pytest.mark.filterwarnings('ignore:\\nPanel:FutureWarning')
+class TestMultiIndexPanel(object):
+
+    def test_iloc_getitem_panel_multiindex(self):
+
+        # GH 7199
+        # Panel with multi-index
+        multi_index = MultiIndex.from_tuples([('ONE', 'one'),
+                                              ('TWO', 'two'),
+                                              ('THREE', 'three')],
+                                             names=['UPPER', 'lower'])
+
+        simple_index = [x[0] for x in multi_index]
+        wd1 = Panel(items=['First', 'Second'],
+                    major_axis=['a', 'b', 'c', 'd'],
+                    minor_axis=multi_index)
+
+        wd2 = Panel(items=['First', 'Second'],
+                    major_axis=['a', 'b', 'c', 'd'],
+                    minor_axis=simple_index)
+
+        expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]]
+        result1 = wd1.iloc[0, [True, True, True, False], [0, 2]]  # WRONG
+        tm.assert_frame_equal(result1, expected1)
+
+        expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]]
+        result2 = wd2.iloc[0, [True, True, True, False], [0, 2]]
+        tm.assert_frame_equal(result2, expected2)
+
+        expected1 = DataFrame(index=['a'], columns=multi_index,
+                              dtype='float64')
+        result1 = wd1.iloc[0, [0], [0, 1, 2]]
+        tm.assert_frame_equal(result1, expected1)
+
+        expected2 = DataFrame(index=['a'], columns=simple_index,
+                              dtype='float64')
+        result2 = wd2.iloc[0, [0], [0, 1, 2]]
+        tm.assert_frame_equal(result2, expected2)
+
+        # GH 7516
+        mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')])
+        p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3),
+                  items=['a', 'b', 'c'], major_axis=mi,
+                  minor_axis=['u', 'v', 'w'])
+        result = p.iloc[:, 1, 0]
+        expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u')
+        tm.assert_series_equal(result, expected)
+
+        result = p.loc[:, (1, 'y'), 'u']
+        tm.assert_series_equal(result, expected)
+
+    def test_panel_setitem_with_multiindex(self):
+
+        # 10360
+        # failing with a multi-index
+        arr = np.array([[[1, 2, 3], [0, 0, 0]],
+                        [[0, 0, 0], [0, 0, 0]]],
+                       dtype=np.float64)
+
+        # reg index
+        axes = dict(items=['A', 'B'], major_axis=[0, 1],
+                    minor_axis=['X', 'Y', 'Z'])
+        p1 = Panel(0., **axes)
+        p1.iloc[0, 0, :] = [1, 2, 3]
+        expected = Panel(arr, **axes)
+        tm.assert_panel_equal(p1, expected)
+
+        # multi-indexes
+        axes['items'] = MultiIndex.from_tuples(
+            [('A', 'a'), ('B', 'b')])
+        p2 = Panel(0., **axes)
+        p2.iloc[0, 0, :] = [1, 2, 3]
+        expected = Panel(arr, **axes)
+        tm.assert_panel_equal(p2, expected)
+
+        axes['major_axis'] = MultiIndex.from_tuples(
+            [('A', 1), ('A', 2)])
+        p3 = Panel(0., **axes)
+        p3.iloc[0, 0, :] = [1, 2, 3]
+        expected = Panel(arr, **axes)
+        tm.assert_panel_equal(p3, expected)
+
+        axes['minor_axis'] = MultiIndex.from_product(
+            [['X'], range(3)])
+        p4 = Panel(0., **axes)
+        p4.iloc[0, 0, :] = [1, 2, 3]
+        expected = Panel(arr, **axes)
+        tm.assert_panel_equal(p4, expected)
+
+        arr = np.array(
+            [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]],
+            dtype=np.float64)
+        p5 = Panel(0., **axes)
+        p5.iloc[0, :, 0] = [1, 2]
+        expected = Panel(arr, **axes)
+        tm.assert_panel_equal(p5, expected)
diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py
new file mode 100644
index 00000000000000..dc2bd4d36e9fbe
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_partial.py
@@ -0,0 +1,183 @@
+from warnings import catch_warnings, simplefilter
+
+import numpy as np
+import pytest
+
+from pandas import DataFrame, MultiIndex
+from pandas.util import testing as tm
+
+
+class TestMultiIndexPartial(object):
+
+    def test_getitem_partial_int(self):
+        # GH 12416
+        # with single item
+        l1 = [10, 20]
+        l2 = ['a', 'b']
+        df = DataFrame(index=range(2),
+                       columns=MultiIndex.from_product([l1, l2]))
+        expected = DataFrame(index=range(2),
+                             columns=l2)
+        result = df[20]
+        tm.assert_frame_equal(result, expected)
+
+        # with list
+        expected = DataFrame(index=range(2),
+                             columns=MultiIndex.from_product([l1[1:], l2]))
+        result = df[[20]]
+        tm.assert_frame_equal(result, expected)
+
+        # missing item:
+        with pytest.raises(KeyError, match='1'):
+            df[1]
+        with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
+            df[[1]]
+
+    def test_series_slice_partial(self):
+        pass
+
+    def test_xs_partial(self, multiindex_dataframe_random_data,
+                        multiindex_year_month_day_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        ymd = multiindex_year_month_day_dataframe_random_data
+        result = frame.xs('foo')
+        result2 = frame.loc['foo']
+        expected = frame.T['foo'].T
+        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, result2)
+
+        result = ymd.xs((2000, 4))
+        expected = ymd.loc[2000, 4]
+        tm.assert_frame_equal(result, expected)
+
+        # ex from #1796
+        index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]],
+                           labels=[[0, 0, 0, 0, 1, 1, 1, 1],
+                                   [0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1,
+                                                              0, 1]])
+        df = DataFrame(np.random.randn(8, 4), index=index,
+                       columns=list('abcd'))
+
+        result = df.xs(['foo', 'one'])
+        expected = df.loc['foo', 'one']
+        tm.assert_frame_equal(result, expected)
+
+    def test_getitem_partial(
+            self, multiindex_year_month_day_dataframe_random_data):
+        ymd = multiindex_year_month_day_dataframe_random_data
+        ymd = ymd.T
+        result = ymd[2000, 2]
+
+        expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1])
+        expected.columns = expected.columns.droplevel(0).droplevel(0)
+        tm.assert_frame_equal(result, expected)
+
+    def test_fancy_slice_partial(
+            self, multiindex_dataframe_random_data,
+            multiindex_year_month_day_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        result = frame.loc['bar':'baz']
+        expected = frame[3:7]
+        tm.assert_frame_equal(result, expected)
+
+        ymd = multiindex_year_month_day_dataframe_random_data
+        result = ymd.loc[(2000, 2):(2000, 4)]
+        lev = ymd.index.labels[1]
+        expected = ymd[(lev >= 1) & (lev <= 3)]
+        tm.assert_frame_equal(result, expected)
+
+    def test_getitem_partial_column_select(self):
+        idx = MultiIndex(labels=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
+                         levels=[['a', 'b'], ['x', 'y'], ['p', 'q']])
+        df = DataFrame(np.random.rand(3, 2), index=idx)
+
+        result = df.loc[('a', 'y'), :]
+        expected = df.loc[('a', 'y')]
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc[('a', 'y'), [1, 0]]
+        expected = df.loc[('a', 'y')][[1, 0]]
+        tm.assert_frame_equal(result, expected)
+
+        with catch_warnings(record=True):
+            simplefilter("ignore", DeprecationWarning)
+            result = df.ix[('a', 'y'), [1, 0]]
+        tm.assert_frame_equal(result, expected)
+
+        pytest.raises(KeyError, df.loc.__getitem__,
+                      (('a', 'foo'), slice(None, None)))
+
+    def test_partial_set(
+            self, multiindex_year_month_day_dataframe_random_data):
+        # GH #397
+        ymd = multiindex_year_month_day_dataframe_random_data
+        df = ymd.copy()
+        exp = ymd.copy()
+        df.loc[2000, 4] = 0
+        exp.loc[2000, 4].values[:] = 0
+        tm.assert_frame_equal(df, exp)
+
+        df['A'].loc[2000, 4] = 1
+        exp['A'].loc[2000, 4].values[:] = 1
+        tm.assert_frame_equal(df, exp)
+
+        df.loc[2000] = 5
+        exp.loc[2000].values[:] = 5
+        tm.assert_frame_equal(df, exp)
+
+        # this works...for now
+        df['A'].iloc[14] = 5
+        assert df['A'][14] == 5
+
+    # ---------------------------------------------------------------------
+    # AMBIGUOUS CASES!
+
+    def test_partial_ix_missing(
+            self, multiindex_year_month_day_dataframe_random_data):
+        pytest.skip("skipping for now")
+
+        ymd = multiindex_year_month_day_dataframe_random_data
+        result = ymd.loc[2000, 0]
+        expected = ymd.loc[2000]['A']
+        tm.assert_series_equal(result, expected)
+
+        # need to put in some work here
+
+        # self.ymd.loc[2000, 0] = 0
+        # assert (self.ymd.loc[2000]['A'] == 0).all()
+
+        # Pretty sure the second (and maybe even the first) is already wrong.
+        pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6))
+        pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6), 0)
+
+    # ---------------------------------------------------------------------
+
+    def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        expected = frame.copy()
+        result = frame.copy()
+        result.loc[['foo', 'bar']] = 0
+        expected.loc['foo'] = 0
+        expected.loc['bar'] = 0
+        tm.assert_frame_equal(result, expected)
+
+        expected = frame.copy()
+        result = frame.copy()
+        result.loc['foo':'bar'] = 0
+        expected.loc['foo'] = 0
+        expected.loc['bar'] = 0
+        tm.assert_frame_equal(result, expected)
+
+        expected = frame['A'].copy()
+        result = frame['A'].copy()
+        result.loc[['foo', 'bar']] = 0
+        expected.loc['foo'] = 0
+        expected.loc['bar'] = 0
+        tm.assert_series_equal(result, expected)
+
+        expected = frame['A'].copy()
+        result = frame['A'].copy()
+        result.loc['foo':'bar'] = 0
+        expected.loc['foo'] = 0
+        expected.loc['bar'] = 0
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/indexing/multiindex/test_set_ops.py b/pandas/tests/indexing/multiindex/test_set_ops.py
new file mode 100644
index 00000000000000..1f864de2dacb1a
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_set_ops.py
@@ -0,0 +1,42 @@
+from numpy.random import randn
+
+from pandas import DataFrame, MultiIndex, Series
+from pandas.util import testing as tm
+
+
+class TestMultiIndexSetOps(object):
+
+    def test_multiindex_symmetric_difference(self):
+        # GH 13490
+        idx = MultiIndex.from_product([['a', 'b'], ['A', 'B']],
+                                      names=['a', 'b'])
+        result = idx ^ idx
+        assert result.names == idx.names
+
+        idx2 = idx.copy().rename(['A', 'B'])
+        result = idx ^ idx2
+        assert result.names == [None, None]
+
+    def test_mixed_depth_insert(self):
+        arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
+                  ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
+                  ['', 'wx', 'wy', '', '', '']]
+
+        tuples = sorted(zip(*arrays))
+        index = MultiIndex.from_tuples(tuples)
+        df = DataFrame(randn(4, 6), columns=index)
+
+        result = df.copy()
+        expected = df.copy()
+        result['b'] = [1, 2, 3, 4]
+        expected['b', '', ''] = [1, 2, 3, 4]
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_insert_column_all_na(self):
+        # GH #1534
+        mix = MultiIndex.from_tuples([('1a', '2a'), ('1a', '2b'), ('1a', '2c')
+                                      ])
+        df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
+        s = Series({(1, 1): 1, (1, 2): 2})
+        df['new'] = s
+        assert df['new'].isna().all()
diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py
new file mode 100644
index 00000000000000..7288983f5f04b0
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_setitem.py
@@ -0,0 +1,404 @@
+from warnings import catch_warnings, simplefilter
+
+import numpy as np
+from numpy.random import randn
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna)
+from pandas.util import testing as tm
+
+
+@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
+class TestMultiIndexSetItem(object):
+
+    def test_setitem_multiindex(self):
+        with catch_warnings(record=True):
+
+            for index_fn in ('ix', 'loc'):
+
+                def assert_equal(a, b):
+                    assert a == b
+
+                def check(target, indexers, value, compare_fn, expected=None):
+                    fn = getattr(target, index_fn)
+                    fn.__setitem__(indexers, value)
+                    result = fn.__getitem__(indexers)
+                    if expected is None:
+                        expected = value
+                    compare_fn(result, expected)
+                # GH7190
+                index = MultiIndex.from_product([np.arange(0, 100),
+                                                 np.arange(0, 80)],
+                                                names=['time', 'firm'])
+                t, n = 0, 2
+                df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x',
+                                                'X', 'd', 'profit'],
+                               index=index)
+                check(target=df, indexers=((t, n), 'X'), value=0,
+                      compare_fn=assert_equal)
+
+                df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
+                                              'X', 'd', 'profit'],
+                               index=index)
+                check(target=df, indexers=((t, n), 'X'), value=1,
+                      compare_fn=assert_equal)
+
+                df = DataFrame(columns=['A', 'w', 'l', 'a', 'x',
+                                        'X', 'd', 'profit'],
+                               index=index)
+                check(target=df, indexers=((t, n), 'X'), value=2,
+                      compare_fn=assert_equal)
+
+                # gh-7218: assigning with 0-dim arrays
+                df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
+                                              'X', 'd', 'profit'],
+                               index=index)
+                check(target=df,
+                      indexers=((t, n), 'X'),
+                      value=np.array(3),
+                      compare_fn=assert_equal,
+                      expected=3, )
+
+                # GH5206
+                df = DataFrame(np.arange(25).reshape(5, 5),
+                               columns='A,B,C,D,E'.split(','), dtype=float)
+                df['F'] = 99
+                row_selection = df['A'] % 2 == 0
+                col_selection = ['B', 'C']
+                with catch_warnings(record=True):
+                    df.ix[row_selection, col_selection] = df['F']
+                output = DataFrame(99., index=[0, 2, 4], columns=['B', 'C'])
+                with catch_warnings(record=True):
+                    tm.assert_frame_equal(df.ix[row_selection, col_selection],
+                                          output)
+                check(target=df,
+                      indexers=(row_selection, col_selection),
+                      value=df['F'],
+                      compare_fn=tm.assert_frame_equal,
+                      expected=output, )
+
+                # GH11372
+                idx = MultiIndex.from_product([
+                    ['A', 'B', 'C'],
+                    date_range('2015-01-01', '2015-04-01', freq='MS')])
+                cols = MultiIndex.from_product([
+                    ['foo', 'bar'],
+                    date_range('2016-01-01', '2016-02-01', freq='MS')])
+
+                df = DataFrame(np.random.random((12, 4)),
+                               index=idx, columns=cols)
+
+                subidx = MultiIndex.from_tuples(
+                    [('A', Timestamp('2015-01-01')),
+                     ('A', Timestamp('2015-02-01'))])
+                subcols = MultiIndex.from_tuples(
+                    [('foo', Timestamp('2016-01-01')),
+                     ('foo', Timestamp('2016-02-01'))])
+
+                vals = DataFrame(np.random.random((2, 2)),
+                                 index=subidx, columns=subcols)
+                check(target=df,
+                      indexers=(subidx, subcols),
+                      value=vals,
+                      compare_fn=tm.assert_frame_equal, )
+                # set all columns
+                vals = DataFrame(
+                    np.random.random((2, 4)), index=subidx, columns=cols)
+                check(target=df,
+                      indexers=(subidx, slice(None, None, None)),
+                      value=vals,
+                      compare_fn=tm.assert_frame_equal, )
+                # identity
+                copy = df.copy()
+                check(target=df, indexers=(df.index, df.columns), value=df,
+                      compare_fn=tm.assert_frame_equal, expected=copy)
+
+    def test_multiindex_setitem(self):
+
+        # GH 3738
+        # setting with a multi-index right hand side
+        arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']),
+                  np.array(['one', 'two', 'one', 'one', 'two', 'one']),
+                  np.arange(0, 6, 1)]
+
+        df_orig = DataFrame(np.random.randn(6, 3), index=arrays,
+                            columns=['A', 'B', 'C']).sort_index()
+
+        expected = df_orig.loc[['bar']] * 2
+        df = df_orig.copy()
+        df.loc[['bar']] *= 2
+        tm.assert_frame_equal(df.loc[['bar']], expected)
+
+        # raise because these have differing levels
+        def f():
+            df.loc['bar'] *= 2
+
+        pytest.raises(TypeError, f)
+
+        # from SO
+        # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
+        df_orig = DataFrame.from_dict({'price': {
+            ('DE', 'Coal', 'Stock'): 2,
+            ('DE', 'Gas', 'Stock'): 4,
+            ('DE', 'Elec', 'Demand'): 1,
+            ('FR', 'Gas', 'Stock'): 5,
+            ('FR', 'Solar', 'SupIm'): 0,
+            ('FR', 'Wind', 'SupIm'): 0
+        }})
+        df_orig.index = MultiIndex.from_tuples(df_orig.index,
+                                               names=['Sit', 'Com', 'Type'])
+
+        expected = df_orig.copy()
+        expected.iloc[[0, 2, 3]] *= 2
+
+        idx = pd.IndexSlice
+        df = df_orig.copy()
+        df.loc[idx[:, :, 'Stock'], :] *= 2
+        tm.assert_frame_equal(df, expected)
+
+        df = df_orig.copy()
+        df.loc[idx[:, :, 'Stock'], 'price'] *= 2
+        tm.assert_frame_equal(df, expected)
+
+    def test_multiindex_assignment(self):
+
+        # GH3777 part 2
+
+        # mixed dtype
+        df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
+                       columns=list('abc'),
+                       index=[[4, 4, 8], [8, 10, 12]])
+        df['d'] = np.nan
+        arr = np.array([0., 1.])
+
+        with catch_warnings(record=True):
+            df.ix[4, 'd'] = arr
+            tm.assert_series_equal(df.ix[4, 'd'],
+                                   Series(arr, index=[8, 10], name='d'))
+
+        # single dtype
+        df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
+                       columns=list('abc'),
+                       index=[[4, 4, 8], [8, 10, 12]])
+
+        with catch_warnings(record=True):
+            df.ix[4, 'c'] = arr
+            exp = Series(arr, index=[8, 10], name='c', dtype='float64')
+            tm.assert_series_equal(df.ix[4, 'c'], exp)
+
+        # scalar ok
+        with catch_warnings(record=True):
+            df.ix[4, 'c'] = 10
+            exp = Series(10, index=[8, 10], name='c', dtype='float64')
+            tm.assert_series_equal(df.ix[4, 'c'], exp)
+
+        # invalid assignments
+        def f():
+            with catch_warnings(record=True):
+                df.ix[4, 'c'] = [0, 1, 2, 3]
+
+        pytest.raises(ValueError, f)
+
+        def f():
+            with catch_warnings(record=True):
+                df.ix[4, 'c'] = [0]
+
+        pytest.raises(ValueError, f)
+
+        # groupby example
+        NUM_ROWS = 100
+        NUM_COLS = 10
+        col_names = ['A' + num for num in
+                     map(str, np.arange(NUM_COLS).tolist())]
+        index_cols = col_names[:5]
+
+        df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
+                       dtype=np.int64, columns=col_names)
+        df = df.set_index(index_cols).sort_index()
+        grp = df.groupby(level=index_cols[:4])
+        df['new_col'] = np.nan
+
+        f_index = np.arange(5)
+
+        def f(name, df2):
+            return Series(np.arange(df2.shape[0]),
+                          name=df2.index.values[0]).reindex(f_index)
+
+        # TODO(wesm): unused?
+        # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T
+
+        # we are actually operating on a copy here
+        # but in this case, that's ok
+        for name, df2 in grp:
+            new_vals = np.arange(df2.shape[0])
+            with catch_warnings(record=True):
+                df.ix[name, 'new_col'] = new_vals
+
+    def test_series_setitem(
+            self, multiindex_year_month_day_dataframe_random_data):
+        ymd = multiindex_year_month_day_dataframe_random_data
+        s = ymd['A']
+
+        s[2000, 3] = np.nan
+        assert isna(s.values[42:65]).all()
+        assert notna(s.values[:42]).all()
+        assert notna(s.values[65:]).all()
+
+        s[2000, 3, 10] = np.nan
+        assert isna(s[49])
+
+    def test_frame_getitem_setitem_boolean(
+            self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        df = frame.T.copy()
+        values = df.values
+
+        result = df[df > 0]
+        expected = df.where(df > 0)
+        tm.assert_frame_equal(result, expected)
+
+        df[df > 0] = 5
+        values[values > 0] = 5
+        tm.assert_almost_equal(df.values, values)
+
+        df[df == 5] = 0
+        values[values == 5] = 0
+        tm.assert_almost_equal(df.values, values)
+
+        # a df that needs alignment first
+        df[df[:-1] < 0] = 2
+        np.putmask(values[:-1], values[:-1] < 0, 2)
+        tm.assert_almost_equal(df.values, values)
+
+        with pytest.raises(TypeError, match='boolean values only'):
+            df[df * 0] = 2
+
+    def test_frame_getitem_setitem_multislice(self):
+        levels = [['t1', 't2'], ['a', 'b', 'c']]
+        labels = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
+        midx = MultiIndex(labels=labels, levels=levels, names=[None, 'id'])
+        df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx)
+
+        result = df.loc[:, 'value']
+        tm.assert_series_equal(df['value'], result)
+
+        with catch_warnings(record=True):
+            simplefilter("ignore", DeprecationWarning)
+            result = df.ix[:, 'value']
+        tm.assert_series_equal(df['value'], result)
+
+        result = df.loc[df.index[1:3], 'value']
+        tm.assert_series_equal(df['value'][1:3], result)
+
+        result = df.loc[:, :]
+        tm.assert_frame_equal(df, result)
+
+        result = df
+        df.loc[:, 'value'] = 10
+        result['value'] = 10
+        tm.assert_frame_equal(df, result)
+
+        df.loc[:, :] = 10
+        tm.assert_frame_equal(df, result)
+
+    def test_frame_setitem_multi_column(self):
+        df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'],
+                                              [0, 1, 0, 1]])
+
+        cp = df.copy()
+        cp['a'] = cp['b']
+        tm.assert_frame_equal(cp['a'], cp['b'])
+
+        # set with ndarray
+        cp = df.copy()
+        cp['a'] = cp['b'].values
+        tm.assert_frame_equal(cp['a'], cp['b'])
+
+        # ---------------------------------------
+        # #1803
+        columns = MultiIndex.from_tuples([('A', '1'), ('A', '2'), ('B', '1')])
+        df = DataFrame(index=[1, 3, 5], columns=columns)
+
+        # Works, but adds a column instead of updating the two existing ones
+        df['A'] = 0.0  # Doesn't work
+        assert (df['A'].values == 0).all()
+
+        # it broadcasts
+        df['B', '1'] = [1, 2, 3]
+        df['A'] = df['B', '1']
+
+        sliced_a1 = df['A', '1']
+        sliced_a2 = df['A', '2']
+        sliced_b1 = df['B', '1']
+        tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
+        tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
+        assert sliced_a1.name == ('A', '1')
+        assert sliced_a2.name == ('A', '2')
+        assert sliced_b1.name == ('B', '1')
+
+    def test_getitem_setitem_tuple_plus_columns(
+            self, multiindex_year_month_day_dataframe_random_data):
+        # GH #1013
+        ymd = multiindex_year_month_day_dataframe_random_data
+        df = ymd[:5]
+
+        result = df.loc[(2000, 1, 6), ['A', 'B', 'C']]
+        expected = df.loc[2000, 1, 6][['A', 'B', 'C']]
+        tm.assert_series_equal(result, expected)
+
+    def test_getitem_setitem_slice_integers(self):
+        index = MultiIndex(levels=[[0, 1, 2], [0, 2]],
+                           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])
+
+        frame = DataFrame(np.random.randn(len(index), 4), index=index,
+                          columns=['a', 'b', 'c', 'd'])
+        res = frame.loc[1:2]
+        exp = frame.reindex(frame.index[2:])
+        tm.assert_frame_equal(res, exp)
+
+        frame.loc[1:2] = 7
+        assert (frame.loc[1:2] == 7).values.all()
+
+        series = Series(np.random.randn(len(index)), index=index)
+
+        res = series.loc[1:2]
+        exp = series.reindex(series.index[2:])
+        tm.assert_series_equal(res, exp)
+
+        series.loc[1:2] = 7
+        assert (series.loc[1:2] == 7).values.all()
+
+    def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        dft = frame.T
+        s = dft['foo', 'two']
+        dft['foo', 'two'] = s > s.median()
+        tm.assert_series_equal(dft['foo', 'two'], s > s.median())
+        # assert isinstance(dft._data.blocks[1].items, MultiIndex)
+
+        reindexed = dft.reindex(columns=[('foo', 'two')])
+        tm.assert_series_equal(reindexed['foo', 'two'], s > s.median())
+
+    def test_set_column_scalar_with_ix(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        subset = frame.index[[1, 4, 5]]
+
+        frame.loc[subset] = 99
+        assert (frame.loc[subset].values == 99).all()
+
+        col = frame['B']
+        col[subset] = 97
+        assert (frame.loc[subset, 'B'] == 97).all()
+
+    def test_nonunique_assignment_1750(self):
+        df = DataFrame([[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]],
+                       columns=list("ABCD"))
+
+        df = df.set_index(['A', 'B'])
+        ix = MultiIndex.from_tuples([(1, 1)])
+
+        df.loc[ix, "C"] = '_'
+
+        assert (df.xs((1, 1))['C'] == '_').all()
diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py
new file mode 100644
index 00000000000000..10f1b22b49dce5
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_slice.py
@@ -0,0 +1,572 @@
+from warnings import catch_warnings
+
+import numpy as np
+import pytest
+
+from pandas.errors import UnsortedIndexError
+
+import pandas as pd
+from pandas import DataFrame, Index, MultiIndex, Series, Timestamp
+from pandas.tests.indexing.common import _mklbl
+from pandas.util import testing as tm
+
+
+@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
+class TestMultiIndexSlicers(object):
+
+    def test_per_axis_per_level_getitem(self):
+
+        # GH6134
+        # example test case
+        ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl(
+            'C', 4), _mklbl('D', 2)])
+        df = DataFrame(np.arange(len(ix.get_values())), index=ix)
+
+        result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
+        expected = df.loc[[tuple([a, b, c, d])
+                           for a, b, c, d in df.index.values
+                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
+                               c == 'C1' or c == 'C3')]]
+        tm.assert_frame_equal(result, expected)
+
+        expected = df.loc[[tuple([a, b, c, d])
+                           for a, b, c, d in df.index.values
+                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
+                               c == 'C1' or c == 'C2' or c == 'C3')]]
+        result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :]
+        tm.assert_frame_equal(result, expected)
+
+        # test multi-index slicing with per axis and per index controls
+        index = MultiIndex.from_tuples([('A', 1), ('A', 2),
+                                        ('A', 3), ('B', 1)],
+                                       names=['one', 'two'])
+        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
+                                          ('b', 'foo'), ('b', 'bah')],
+                                         names=['lvl0', 'lvl1'])
+
+        df = DataFrame(
+            np.arange(16, dtype='int64').reshape(
+                4, 4), index=index, columns=columns)
+        df = df.sort_index(axis=0).sort_index(axis=1)
+
+        # identity
+        result = df.loc[(slice(None), slice(None)), :]
+        tm.assert_frame_equal(result, df)
+        result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
+        tm.assert_frame_equal(result, df)
+        result = df.loc[:, (slice(None), slice(None))]
+        tm.assert_frame_equal(result, df)
+
+        # index
+        result = df.loc[(slice(None), [1]), :]
+        expected = df.iloc[[0, 3]]
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc[(slice(None), 1), :]
+        expected = df.iloc[[0, 3]]
+        tm.assert_frame_equal(result, expected)
+
+        # columns
+        result = df.loc[:, (slice(None), ['foo'])]
+        expected = df.iloc[:, [1, 3]]
+        tm.assert_frame_equal(result, expected)
+
+        # both
+        result = df.loc[(slice(None), 1), (slice(None), ['foo'])]
+        expected = df.iloc[[0, 3], [1, 3]]
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc['A', 'a']
+        expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]),
+                             index=Index([1, 2, 3], name='two'),
+                             columns=Index(['bar', 'foo'], name='lvl1'))
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc[(slice(None), [1, 2]), :]
+        expected = df.iloc[[0, 1, 3]]
+        tm.assert_frame_equal(result, expected)
+
+        # multi-level series
+        s = Series(np.arange(len(ix.get_values())), index=ix)
+        result = s.loc['A1':'A3', :, ['C1', 'C3']]
+        expected = s.loc[[tuple([a, b, c, d])
+                          for a, b, c, d in s.index.values
+                          if (a == 'A1' or a == 'A2' or a == 'A3') and (
+                              c == 'C1' or c == 'C3')]]
+        tm.assert_series_equal(result, expected)
+
+        # boolean indexers
+        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
+        expected = df.iloc[[2, 3]]
+        tm.assert_frame_equal(result, expected)
+
+        def f():
+            df.loc[(slice(None), np.array([True, False])), :]
+
+        pytest.raises(ValueError, f)
+
+        # ambiguous cases
+        # these can be multiply interpreted (e.g. in this case
+        # as df.loc[slice(None),[1]] as well
+        pytest.raises(KeyError, lambda: df.loc[slice(None), [1]])
+
+        result = df.loc[(slice(None), [1]), :]
+        expected = df.iloc[[0, 3]]
+        tm.assert_frame_equal(result, expected)
+
+        # not lexsorted
+        assert df.index.lexsort_depth == 2
+        df = df.sort_index(level=1, axis=0)
+        assert df.index.lexsort_depth == 0
+
+        msg = ('MultiIndex slicing requires the index to be '
+               r'lexsorted: slicing on levels \[1\], lexsort depth 0')
+        with pytest.raises(UnsortedIndexError, match=msg):
+            df.loc[(slice(None), slice('bar')), :]
+
+        # GH 16734: not sorted, but no real slicing
+        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
+        tm.assert_frame_equal(result, df.iloc[[1, 3], :])
+
+    def test_multiindex_slicers_non_unique(self):
+
+        # GH 7106
+        # non-unique mi index support
+        df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'],
+                             B=['a', 'a', 'a', 'a'],
+                             C=[1, 2, 1, 3],
+                             D=[1, 2, 3, 4]))
+              .set_index(['A', 'B', 'C']).sort_index())
+        assert not df.index.is_unique
+        expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'],
+                                   C=[1, 1], D=[1, 3]))
+                    .set_index(['A', 'B', 'C']).sort_index())
+        result = df.loc[(slice(None), slice(None), 1), :]
+        tm.assert_frame_equal(result, expected)
+
+        # this is equivalent of an xs expression
+        result = df.xs(1, level=2, drop_level=False)
+        tm.assert_frame_equal(result, expected)
+
+        df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'],
+                             B=['a', 'a', 'a', 'a'],
+                             C=[1, 2, 1, 2],
+                             D=[1, 2, 3, 4]))
+              .set_index(['A', 'B', 'C']).sort_index())
+        assert not df.index.is_unique
+        expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'],
+                                   C=[1, 1], D=[1, 3]))
+                    .set_index(['A', 'B', 'C']).sort_index())
+        result = df.loc[(slice(None), slice(None), 1), :]
+        assert not result.index.is_unique
+        tm.assert_frame_equal(result, expected)
+
+        # GH12896
+        # numpy-implementation dependent bug
+        ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 14, 16,
+                17, 18, 19, 200000, 200000]
+        n = len(ints)
+        idx = MultiIndex.from_arrays([['a'] * n, ints])
+        result = Series([1] * n, index=idx)
+        result = result.sort_index()
+        result = result.loc[(slice(None), slice(100000))]
+        expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
+        tm.assert_series_equal(result, expected)
+
+    def test_multiindex_slicers_datetimelike(self):
+
+        # GH 7429
+        # buggy/inconsistent behavior when slicing with datetime-like
+        import datetime
+        dates = [datetime.datetime(2012, 1, 1, 12, 12, 12) +
+                 datetime.timedelta(days=i) for i in range(6)]
+        freq = [1, 2]
+        index = MultiIndex.from_product(
+            [dates, freq], names=['date', 'frequency'])
+
+        df = DataFrame(
+            np.arange(6 * 2 * 4, dtype='int64').reshape(
+                -1, 4), index=index, columns=list('ABCD'))
+
+        # multi-axis slicing
+        idx = pd.IndexSlice
+        expected = df.iloc[[0, 2, 4], [0, 1]]
+        result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),
+                               Timestamp('2012-01-03 12:12:12')),
+                         slice(1, 1)), slice('A', 'B')]
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp(
+            '2012-01-03 12:12:12')], idx[1:1]), slice('A', 'B')]
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),
+                               Timestamp('2012-01-03 12:12:12')), 1),
+                        slice('A', 'B')]
+        tm.assert_frame_equal(result, expected)
+
+        # with strings
+        result = df.loc[(slice('2012-01-01 12:12:12', '2012-01-03 12:12:12'),
+                         slice(1, 1)), slice('A', 'B')]
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'], 1),
+                        idx['A', 'B']]
+        tm.assert_frame_equal(result, expected)
+
+    def test_multiindex_slicers_edges(self):
+        # GH 8132
+        # various edge cases
+        df = DataFrame(
+            {'A': ['A0'] * 5 + ['A1'] * 5 + ['A2'] * 5,
+             'B': ['B0', 'B0', 'B1', 'B1', 'B2'] * 3,
+             'DATE': ["2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30",
+                      "2013-08-06", "2013-06-11", "2013-07-02", "2013-07-09",
+                      "2013-07-30", "2013-08-06", "2013-09-03", "2013-10-01",
+                      "2013-07-09", "2013-08-06", "2013-09-03"],
+             'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2]})
+
+        df['DATE'] = pd.to_datetime(df['DATE'])
+        df1 = df.set_index(['A', 'B', 'DATE'])
+        df1 = df1.sort_index()
+
+        # A1 - Get all values under "A0" and "A1"
+        result = df1.loc[(slice('A1')), :]
+        expected = df1.iloc[0:10]
+        tm.assert_frame_equal(result, expected)
+
+        # A2 - Get all values from the start to "A2"
+        result = df1.loc[(slice('A2')), :]
+        expected = df1
+        tm.assert_frame_equal(result, expected)
+
+        # A3 - Get all values under "B1" or "B2"
+        result = df1.loc[(slice(None), slice('B1', 'B2')), :]
+        expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
+        tm.assert_frame_equal(result, expected)
+
+        # A4 - Get all values between 2013-07-02 and 2013-07-09
+        result = df1.loc[(slice(None), slice(None),
+                          slice('20130702', '20130709')), :]
+        expected = df1.iloc[[1, 2, 6, 7, 12]]
+        tm.assert_frame_equal(result, expected)
+
+        # B1 - Get all values in B0 that are also under A0, A1 and A2
+        result = df1.loc[(slice('A2'), slice('B0')), :]
+        expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
+        tm.assert_frame_equal(result, expected)
+
+        # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
+        # the As)
+        result = df1.loc[(slice(None), slice('B2')), :]
+        expected = df1
+        tm.assert_frame_equal(result, expected)
+
+        # B3 - Get all values from B1 to B2 and up to 2013-08-06
+        result = df1.loc[(slice(None), slice('B1', 'B2'),
+                          slice('2013-08-06')), :]
+        expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
+        tm.assert_frame_equal(result, expected)
+
+        # B4 - Same as A4 but the start of the date slice is not a key.
+        #      shows indexing on a partial selection slice
+        result = df1.loc[(slice(None), slice(None),
+                          slice('20130701', '20130709')), :]
+        expected = df1.iloc[[1, 2, 6, 7, 12]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_per_axis_per_level_doc_examples(self):
+
+        # test index maker
+        idx = pd.IndexSlice
+
+        # from indexing.rst / advanced
+        index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
+                                         _mklbl('C', 4), _mklbl('D', 2)])
+        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
+                                          ('b', 'foo'), ('b', 'bah')],
+                                         names=['lvl0', 'lvl1'])
+        df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
+                       .reshape((len(index), len(columns))),
+                       index=index, columns=columns)
+        result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
+        expected = df.loc[[tuple([a, b, c, d])
+                           for a, b, c, d in df.index.values
+                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
+                               c == 'C1' or c == 'C3')]]
+        tm.assert_frame_equal(result, expected)
+        result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :]
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :]
+        expected = df.loc[[tuple([a, b, c, d])
+                           for a, b, c, d in df.index.values
+                           if (c == 'C1' or c == 'C3')]]
+        tm.assert_frame_equal(result, expected)
+        result = df.loc[idx[:, :, ['C1', 'C3']], :]
+        tm.assert_frame_equal(result, expected)
+
+        # not sorted
+        def f():
+            df.loc['A1', ('a', slice('foo'))]
+
+        pytest.raises(UnsortedIndexError, f)
+
+        # GH 16734: not sorted, but no real slicing
+        tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')],
+                              df.loc['A1'].iloc[:, [0, 2]])
+
+        df = df.sort_index(axis=1)
+
+        # slicing
+        df.loc['A1', (slice(None), 'foo')]
+        df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')]
+
+        # setitem
+        df.loc(axis=0)[:, :, ['C1', 'C3']] = -10
+
+    def test_loc_axis_arguments(self):
+
+        index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
+                                         _mklbl('C', 4), _mklbl('D', 2)])
+        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
+                                          ('b', 'foo'), ('b', 'bah')],
+                                         names=['lvl0', 'lvl1'])
+        df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
+                       .reshape((len(index), len(columns))),
+                       index=index,
+                       columns=columns).sort_index().sort_index(axis=1)
+
+        # axis 0
+        result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']]
+        expected = df.loc[[tuple([a, b, c, d])
+                           for a, b, c, d in df.index.values
+                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
+                               c == 'C1' or c == 'C3')]]
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc(axis='index')[:, :, ['C1', 'C3']]
+        expected = df.loc[[tuple([a, b, c, d])
+                           for a, b, c, d in df.index.values
+                           if (c == 'C1' or c == 'C3')]]
+        tm.assert_frame_equal(result, expected)
+
+        # axis 1
+        result = df.loc(axis=1)[:, 'foo']
+        expected = df.loc[:, (slice(None), 'foo')]
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc(axis='columns')[:, 'foo']
+        expected = df.loc[:, (slice(None), 'foo')]
+        tm.assert_frame_equal(result, expected)
+
+        # invalid axis
+        def f():
+            df.loc(axis=-1)[:, :, ['C1', 'C3']]
+
+        pytest.raises(ValueError, f)
+
+        def f():
+            df.loc(axis=2)[:, :, ['C1', 'C3']]
+
+        pytest.raises(ValueError, f)
+
+        def f():
+            df.loc(axis='foo')[:, :, ['C1', 'C3']]
+
+        pytest.raises(ValueError, f)
+
+    def test_per_axis_per_level_setitem(self):
+
+        # test index maker
+        idx = pd.IndexSlice
+
+        # test multi-index slicing with per axis and per index controls
+        index = MultiIndex.from_tuples([('A', 1), ('A', 2),
+                                        ('A', 3), ('B', 1)],
+                                       names=['one', 'two'])
+        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
+                                          ('b', 'foo'), ('b', 'bah')],
+                                         names=['lvl0', 'lvl1'])
+
+        df_orig = DataFrame(
+            np.arange(16, dtype='int64').reshape(
+                4, 4), index=index, columns=columns)
+        df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
+
+        # identity
+        df = df_orig.copy()
+        df.loc[(slice(None), slice(None)), :] = 100
+        expected = df_orig.copy()
+        expected.iloc[:, :] = 100
+        tm.assert_frame_equal(df, expected)
+
+        df = df_orig.copy()
+        df.loc(axis=0)[:, :] = 100
+        expected = df_orig.copy()
+        expected.iloc[:, :] = 100
+        tm.assert_frame_equal(df, expected)
+
+        df = df_orig.copy()
+        df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
+        expected = df_orig.copy()
+        expected.iloc[:, :] = 100
+        tm.assert_frame_equal(df, expected)
+
+        df = df_orig.copy()
+        df.loc[:, (slice(None), slice(None))] = 100
+        expected = df_orig.copy()
+        expected.iloc[:, :] = 100
+        tm.assert_frame_equal(df, expected)
+
+        # index
+        df = df_orig.copy()
+        df.loc[(slice(None), [1]), :] = 100
+        expected = df_orig.copy()
+        expected.iloc[[0, 3]] = 100
+        tm.assert_frame_equal(df, expected)
+
+        df = df_orig.copy()
+        df.loc[(slice(None), 1), :] = 100
+        expected = df_orig.copy()
+        expected.iloc[[0, 3]] = 100
+        tm.assert_frame_equal(df, expected)
+
+        df = df_orig.copy()
+        df.loc(axis=0)[:, 1] = 100
+        expected = df_orig.copy()
+        expected.iloc[[0, 3]] = 100
+        tm.assert_frame_equal(df, expected)
+
+        # columns
+        df = df_orig.copy()
+        df.loc[:, (slice(None), ['foo'])] = 100
+        expected = df_orig.copy()
+        expected.iloc[:, [1, 3]] = 100
+        tm.assert_frame_equal(df, expected)
+
+        # both
+        df = df_orig.copy()
+        df.loc[(slice(None), 1), (slice(None), ['foo'])] = 100
+        expected = df_orig.copy()
+        expected.iloc[[0, 3], [1, 3]] = 100
+        tm.assert_frame_equal(df, expected)
+
+        df = df_orig.copy()
+        df.loc[idx[:, 1], idx[:, ['foo']]] = 100
+        expected = df_orig.copy()
+        expected.iloc[[0, 3], [1, 3]] = 100
+        tm.assert_frame_equal(df, expected)
+
+        df = df_orig.copy()
+        df.loc['A', 'a'] = 100
+        expected = df_orig.copy()
+        expected.iloc[0:3, 0:2] = 100
+        tm.assert_frame_equal(df, expected)
+
+        # setting with a list-like
+        df = df_orig.copy()
+        df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
+            [[100, 100], [100, 100]], dtype='int64')
+        expected = df_orig.copy()
+        expected.iloc[[0, 3], [1, 3]] = 100
+        tm.assert_frame_equal(df, expected)
+
+        # not enough values
+        df = df_orig.copy()
+
+        def f():
+            df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
+                [[100], [100, 100]], dtype='int64')
+
+        pytest.raises(ValueError, f)
+
+        def f():
+            df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
+                [100, 100, 100, 100], dtype='int64')
+
+        pytest.raises(ValueError, f)
+
+        # with an alignable rhs
+        df = df_orig.copy()
+        df.loc[(slice(None), 1), (slice(None), ['foo'])] = df.loc[(slice(
+            None), 1), (slice(None), ['foo'])] * 5
+        expected = df_orig.copy()
+        expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
+        tm.assert_frame_equal(df, expected)
+
+        df = df_orig.copy()
+        df.loc[(slice(None), 1), (slice(None), ['foo'])] *= df.loc[(slice(
+            None), 1), (slice(None), ['foo'])]
+        expected = df_orig.copy()
+        expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
+        tm.assert_frame_equal(df, expected)
+
+        rhs = df_orig.loc[(slice(None), 1), (slice(None), ['foo'])].copy()
+        rhs.loc[:, ('c', 'bah')] = 10
+        df = df_orig.copy()
+        df.loc[(slice(None), 1), (slice(None), ['foo'])] *= rhs
+        expected = df_orig.copy()
+        expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
+        tm.assert_frame_equal(df, expected)
+
+    def test_multiindex_label_slicing_with_negative_step(self):
+        s = Series(np.arange(20),
+                   MultiIndex.from_product([list('abcde'), np.arange(4)]))
+        SLC = pd.IndexSlice
+
+        def assert_slices_equivalent(l_slc, i_slc):
+            tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
+            tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
+            with catch_warnings(record=True):
+                tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc])
+
+        assert_slices_equivalent(SLC[::-1], SLC[::-1])
+
+        assert_slices_equivalent(SLC['d'::-1], SLC[15::-1])
+        assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1])
+
+        assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1])
+        assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1])
+
+        assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1])
+        assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1])
+        assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1])
+        assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1])
+        assert_slices_equivalent(SLC['b':'d':-1], SLC[:0])
+
+        assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1])
+        assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1])
+        assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1])
+
+    def test_multiindex_slice_first_level(self):
+        # GH 12697
+        freq = ['a', 'b', 'c', 'd']
+        idx = MultiIndex.from_product([freq, np.arange(500)])
+        df = DataFrame(list(range(2000)), index=idx, columns=['Test'])
+        df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
+        result = df_slice.loc['a']
+        expected = DataFrame(list(range(30, 71)),
+                             columns=['Test'], index=range(30, 71))
+        tm.assert_frame_equal(result, expected)
+        result = df_slice.loc['d']
+        expected = DataFrame(list(range(1530, 1571)),
+                             columns=['Test'], index=range(30, 71))
+        tm.assert_frame_equal(result, expected)
+
+    def test_int_series_slicing(
+            self, multiindex_year_month_day_dataframe_random_data):
+        ymd = multiindex_year_month_day_dataframe_random_data
+        s = ymd['A']
+        result = s[5:]
+        expected = s.reindex(s.index[5:])
+        tm.assert_series_equal(result, expected)
+
+        exp = ymd['A'].copy()
+        s[5:] = 0
+        exp.values[5:] = 0
+        tm.assert_numpy_array_equal(s.values, exp.values)
+
+        result = ymd[5:]
+        expected = ymd.reindex(s.index[5:])
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexing/multiindex/test_sorted.py b/pandas/tests/indexing/multiindex/test_sorted.py
new file mode 100644
index 00000000000000..898959d74383a8
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_sorted.py
@@ -0,0 +1,92 @@
+import numpy as np
+from numpy.random import randn
+
+from pandas.compat import lzip
+
+from pandas import DataFrame, MultiIndex, Series
+from pandas.util import testing as tm
+
+
+class TestMultiIndexSorted(object):
+    def test_getitem_multilevel_index_tuple_not_sorted(self):
+        index_columns = list("abc")
+        df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]],
+                       columns=index_columns + ["data"])
+        df = df.set_index(index_columns)
+        query_index = df.index[:1]
+        rs = df.loc[query_index, "data"]
+
+        xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c'])
+        xp = Series(['x'], index=xp_idx, name='data')
+        tm.assert_series_equal(rs, xp)
+
+    def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        df = frame.sort_index(level=1).T
+
+        # buglet with int typechecking
+        result = df.iloc[:, :np.int32(3)]
+        expected = df.reindex(columns=df.columns[:3])
+        tm.assert_frame_equal(result, expected)
+
+    def test_frame_getitem_not_sorted2(self):
+        # 13431
+        df = DataFrame({'col1': ['b', 'd', 'b', 'a'],
+                        'col2': [3, 1, 1, 2],
+                        'data': ['one', 'two', 'three', 'four']})
+
+        df2 = df.set_index(['col1', 'col2'])
+        df2_original = df2.copy()
+
+        df2.index.set_levels(['b', 'd', 'a'], level='col1', inplace=True)
+        df2.index.set_labels([0, 1, 0, 2], level='col1', inplace=True)
+        assert not df2.index.is_lexsorted()
+        assert not df2.index.is_monotonic
+
+        assert df2_original.index.equals(df2.index)
+        expected = df2.sort_index()
+        assert expected.index.is_lexsorted()
+        assert expected.index.is_monotonic
+
+        result = df2.sort_index(level=0)
+        assert result.index.is_lexsorted()
+        assert result.index.is_monotonic
+        tm.assert_frame_equal(result, expected)
+
+    def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        df = frame.T
+        df['foo', 'four'] = 'foo'
+
+        arrays = [np.array(x) for x in zip(*df.columns.values)]
+
+        result = df['foo']
+        result2 = df.loc[:, 'foo']
+        expected = df.reindex(columns=df.columns[arrays[0] == 'foo'])
+        expected.columns = expected.columns.droplevel(0)
+        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result2, expected)
+
+        df = df.T
+        result = df.xs('foo')
+        result2 = df.loc['foo']
+        expected = df.reindex(df.index[arrays[0] == 'foo'])
+        expected.index = expected.index.droplevel(0)
+        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result2, expected)
+
+    def test_series_getitem_not_sorted(self):
+        arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'],
+                  ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
+        tuples = lzip(*arrays)
+        index = MultiIndex.from_tuples(tuples)
+        s = Series(randn(8), index=index)
+
+        arrays = [np.array(x) for x in zip(*index.values)]
+
+        result = s['qux']
+        result2 = s.loc['qux']
+        expected = s[arrays[0] == 'qux']
+        expected.index = expected.index.droplevel(0)
+        tm.assert_series_equal(result, expected)
+        tm.assert_series_equal(result2, expected)
diff --git a/pandas/tests/indexing/multiindex/test_xs.py b/pandas/tests/indexing/multiindex/test_xs.py
new file mode 100644
index 00000000000000..b8380e3a95f2ab
--- /dev/null
+++ b/pandas/tests/indexing/multiindex/test_xs.py
@@ -0,0 +1,164 @@
+import numpy as np
+import pytest
+
+from pandas.compat import StringIO, lrange, product as cart_product
+
+from pandas import DataFrame, Index, MultiIndex, concat, read_csv
+import pandas.core.common as com
+from pandas.util import testing as tm
+
+
+class TestMultiIndexXs(object):
+
+    def test_xs_multiindex(self):
+
+        # GH2903
+        columns = MultiIndex.from_tuples(
+            [('a', 'foo'), ('a', 'bar'), ('b', 'hello'),
+             ('b', 'world')], names=['lvl0', 'lvl1'])
+        df = DataFrame(np.random.randn(4, 4), columns=columns)
+        df.sort_index(axis=1, inplace=True)
+        result = df.xs('a', level='lvl0', axis=1)
+        expected = df.iloc[:, 0:2].loc[:, 'a']
+        tm.assert_frame_equal(result, expected)
+
+        result = df.xs('foo', level='lvl1', axis=1)
+        expected = df.iloc[:, 1:2].copy()
+        expected.columns = expected.columns.droplevel('lvl1')
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        xs = frame.xs(('bar', 'two'))
+        xs2 = frame.loc[('bar', 'two')]
+
+        tm.assert_series_equal(xs, xs2)
+        tm.assert_almost_equal(xs.values, frame.values[4])
+
+        # GH 6574
+        # missing values in returned index should be preserrved
+        acc = [
+            ('a', 'abcde', 1),
+            ('b', 'bbcde', 2),
+            ('y', 'yzcde', 25),
+            ('z', 'xbcde', 24),
+            ('z', None, 26),
+            ('z', 'zbcde', 25),
+            ('z', 'ybcde', 26),
+        ]
+        df = DataFrame(acc,
+                       columns=['a1', 'a2', 'cnt']).set_index(['a1', 'a2'])
+        expected = DataFrame({'cnt': [24, 26, 25, 26]}, index=Index(
+            ['xbcde', np.nan, 'zbcde', 'ybcde'], name='a2'))
+
+        result = df.xs('z', level='a1')
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_with_duplicates(self, multiindex_dataframe_random_data):
+        # Issue #13719
+        frame = multiindex_dataframe_random_data
+        df_dup = concat([frame] * 2)
+        assert df_dup.index.is_unique is False
+        expected = concat([frame.xs('one', level='second')] * 2)
+        tm.assert_frame_equal(df_dup.xs('one', level='second'), expected)
+        tm.assert_frame_equal(df_dup.xs(['one'], level=['second']), expected)
+
+    def test_xs_level(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        result = frame.xs('two', level='second')
+        expected = frame[frame.index.get_level_values(1) == 'two']
+        expected.index = expected.index.droplevel(1)
+
+        tm.assert_frame_equal(result, expected)
+
+        index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'), (
+            'p', 'q', 'r')])
+        df = DataFrame(np.random.randn(3, 5), index=index)
+        result = df.xs('c', level=2)
+        expected = df[1:2]
+        expected.index = expected.index.droplevel(2)
+        tm.assert_frame_equal(result, expected)
+
+        # this is a copy in 0.14
+        result = frame.xs('two', level='second')
+
+        # setting this will give a SettingWithCopyError
+        # as we are trying to write a view
+        def f(x):
+            x[:] = 10
+
+        pytest.raises(com.SettingWithCopyError, f, result)
+
+    def test_xs_level_multiple(self):
+        text = """                      A       B       C       D        E
+one two three   four
+a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
+a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
+x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
+
+        df = read_csv(StringIO(text), sep=r'\s+', engine='python')
+
+        result = df.xs(('a', 4), level=['one', 'four'])
+        expected = df.xs('a').xs(4, level='four')
+        tm.assert_frame_equal(result, expected)
+
+        # this is a copy in 0.14
+        result = df.xs(('a', 4), level=['one', 'four'])
+
+        # setting this will give a SettingWithCopyError
+        # as we are trying to write a view
+        def f(x):
+            x[:] = 10
+
+        pytest.raises(com.SettingWithCopyError, f, result)
+
+        # GH2107
+        dates = lrange(20111201, 20111205)
+        ids = 'abcde'
+        idx = MultiIndex.from_tuples([x for x in cart_product(dates, ids)])
+        idx.names = ['date', 'secid']
+        df = DataFrame(np.random.randn(len(idx), 3), idx, ['X', 'Y', 'Z'])
+
+        rs = df.xs(20111201, level='date')
+        xp = df.loc[20111201, :]
+        tm.assert_frame_equal(rs, xp)
+
+    def test_xs_level0(self):
+        text = """                      A       B       C       D        E
+one two three   four
+a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
+a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
+x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
+
+        df = read_csv(StringIO(text), sep=r'\s+', engine='python')
+
+        result = df.xs('a', level=0)
+        expected = df.xs('a')
+        assert len(result) == 2
+        tm.assert_frame_equal(result, expected)
+
+    def test_xs_level_series(self, multiindex_dataframe_random_data,
+                             multiindex_year_month_day_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        ymd = multiindex_year_month_day_dataframe_random_data
+        s = frame['A']
+        result = s[:, 'two']
+        expected = frame.xs('two', level=1)['A']
+        tm.assert_series_equal(result, expected)
+
+        s = ymd['A']
+        result = s[2000, 5]
+        expected = ymd.loc[2000, 5]['A']
+        tm.assert_series_equal(result, expected)
+
+        # not implementing this for now
+
+        pytest.raises(TypeError, s.__getitem__, (2000, slice(3, 4)))
+
+        # result = s[2000, 3:4]
+        # lv =s.index.get_level_values(1)
+        # expected = s[(lv == 3) | (lv == 4)]
+        # expected.index = expected.index.droplevel(0)
+        # tm.assert_series_equal(result, expected)
+
+        # can do this though
diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py
deleted file mode 100644
index bf5fa0a48cfe74..00000000000000
--- a/pandas/tests/indexing/test_multiindex.py
+++ /dev/null
@@ -1,2249 +0,0 @@
-from datetime import datetime
-from warnings import catch_warnings, simplefilter
-
-import numpy as np
-from numpy.random import randn
-import pytest
-
-import pandas._libs.index as _index
-from pandas.compat import (
-    StringIO, lrange, lzip, product as cart_product, range, u, zip)
-from pandas.errors import PerformanceWarning, UnsortedIndexError
-
-import pandas as pd
-from pandas import (
-    DataFrame, Index, MultiIndex, Panel, Period, Series, Timestamp, concat,
-    date_range, isna, notna, period_range, read_csv)
-import pandas.core.common as com
-from pandas.tests.indexing.common import _mklbl
-from pandas.util import testing as tm
-
-
-@pytest.fixture
-def multiindex_dataframe_random_data():
-    """DataFrame with 2 level MultiIndex with random data"""
-    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
-                                                              'three']],
-                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
-                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
-                       names=['first', 'second'])
-    return DataFrame(np.random.randn(10, 3), index=index,
-                     columns=Index(['A', 'B', 'C'], name='exp'))
-
-
-@pytest.fixture
-def single_level_multiindex():
-    """single level MultiIndex"""
-    return MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
-                      labels=[[0, 1, 2, 3]], names=['first'])
-
-
-@pytest.fixture
-def multiindex_year_month_day_dataframe_random_data():
-    """DataFrame with 3 level MultiIndex (year, month, day) covering
-    first 100 business days from 2000-01-01 with random data"""
-    tm.N = 100
-    tdf = tm.makeTimeDataFrame()
-    ymd = tdf.groupby([lambda x: x.year, lambda x: x.month,
-                       lambda x: x.day]).sum()
-    # use Int64Index, to make sure things work
-    ymd.index.set_levels([lev.astype('i8') for lev in ymd.index.levels],
-                         inplace=True)
-    ymd.index.set_names(['year', 'month', 'day'], inplace=True)
-    return ymd
-
-
-@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
-class TestMultiIndexBasic(object):
-
-    def test_iloc_getitem_multiindex2(self):
-        # TODO(wesm): fix this
-        pytest.skip('this test was being suppressed, '
-                    'needs to be fixed')
-
-        arr = np.random.randn(3, 3)
-        df = DataFrame(arr, columns=[[2, 2, 4], [6, 8, 10]],
-                       index=[[4, 4, 8], [8, 10, 12]])
-
-        rs = df.iloc[2]
-        xp = Series(arr[2], index=df.columns)
-        tm.assert_series_equal(rs, xp)
-
-        rs = df.iloc[:, 2]
-        xp = Series(arr[:, 2], index=df.index)
-        tm.assert_series_equal(rs, xp)
-
-        rs = df.iloc[2, 2]
-        xp = df.values[2, 2]
-        assert rs == xp
-
-        # for multiple items
-        # GH 5528
-        rs = df.iloc[[0, 1]]
-        xp = df.xs(4, drop_level=False)
-        tm.assert_frame_equal(rs, xp)
-
-        tup = zip(*[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']])
-        index = MultiIndex.from_tuples(tup)
-        df = DataFrame(np.random.randn(4, 4), index=index)
-        rs = df.iloc[[2, 3]]
-        xp = df.xs('b', drop_level=False)
-        tm.assert_frame_equal(rs, xp)
-
-    def test_setitem_multiindex(self):
-        with catch_warnings(record=True):
-
-            for index_fn in ('ix', 'loc'):
-
-                def assert_equal(a, b):
-                    assert a == b
-
-                def check(target, indexers, value, compare_fn, expected=None):
-                    fn = getattr(target, index_fn)
-                    fn.__setitem__(indexers, value)
-                    result = fn.__getitem__(indexers)
-                    if expected is None:
-                        expected = value
-                    compare_fn(result, expected)
-                # GH7190
-                index = MultiIndex.from_product([np.arange(0, 100),
-                                                 np.arange(0, 80)],
-                                                names=['time', 'firm'])
-                t, n = 0, 2
-                df = DataFrame(np.nan, columns=['A', 'w', 'l', 'a', 'x',
-                                                'X', 'd', 'profit'],
-                               index=index)
-                check(target=df, indexers=((t, n), 'X'), value=0,
-                      compare_fn=assert_equal)
-
-                df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
-                                              'X', 'd', 'profit'],
-                               index=index)
-                check(target=df, indexers=((t, n), 'X'), value=1,
-                      compare_fn=assert_equal)
-
-                df = DataFrame(columns=['A', 'w', 'l', 'a', 'x',
-                                        'X', 'd', 'profit'],
-                               index=index)
-                check(target=df, indexers=((t, n), 'X'), value=2,
-                      compare_fn=assert_equal)
-
-                # gh-7218: assigning with 0-dim arrays
-                df = DataFrame(-999, columns=['A', 'w', 'l', 'a', 'x',
-                                              'X', 'd', 'profit'],
-                               index=index)
-                check(target=df,
-                      indexers=((t, n), 'X'),
-                      value=np.array(3),
-                      compare_fn=assert_equal,
-                      expected=3, )
-
-                # GH5206
-                df = DataFrame(np.arange(25).reshape(5, 5),
-                               columns='A,B,C,D,E'.split(','), dtype=float)
-                df['F'] = 99
-                row_selection = df['A'] % 2 == 0
-                col_selection = ['B', 'C']
-                with catch_warnings(record=True):
-                    df.ix[row_selection, col_selection] = df['F']
-                output = DataFrame(99., index=[0, 2, 4], columns=['B', 'C'])
-                with catch_warnings(record=True):
-                    tm.assert_frame_equal(df.ix[row_selection, col_selection],
-                                          output)
-                check(target=df,
-                      indexers=(row_selection, col_selection),
-                      value=df['F'],
-                      compare_fn=tm.assert_frame_equal,
-                      expected=output, )
-
-                # GH11372
-                idx = MultiIndex.from_product([
-                    ['A', 'B', 'C'],
-                    date_range('2015-01-01', '2015-04-01', freq='MS')])
-                cols = MultiIndex.from_product([
-                    ['foo', 'bar'],
-                    date_range('2016-01-01', '2016-02-01', freq='MS')])
-
-                df = DataFrame(np.random.random((12, 4)),
-                               index=idx, columns=cols)
-
-                subidx = MultiIndex.from_tuples(
-                    [('A', Timestamp('2015-01-01')),
-                     ('A', Timestamp('2015-02-01'))])
-                subcols = MultiIndex.from_tuples(
-                    [('foo', Timestamp('2016-01-01')),
-                     ('foo', Timestamp('2016-02-01'))])
-
-                vals = DataFrame(np.random.random((2, 2)),
-                                 index=subidx, columns=subcols)
-                check(target=df,
-                      indexers=(subidx, subcols),
-                      value=vals,
-                      compare_fn=tm.assert_frame_equal, )
-                # set all columns
-                vals = DataFrame(
-                    np.random.random((2, 4)), index=subidx, columns=cols)
-                check(target=df,
-                      indexers=(subidx, slice(None, None, None)),
-                      value=vals,
-                      compare_fn=tm.assert_frame_equal, )
-                # identity
-                copy = df.copy()
-                check(target=df, indexers=(df.index, df.columns), value=df,
-                      compare_fn=tm.assert_frame_equal, expected=copy)
-
-    def test_loc_getitem_series(self):
-        # GH14730
-        # passing a series as a key with a MultiIndex
-        index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
-        x = Series(index=index, data=range(9), dtype=np.float64)
-        y = Series([1, 3])
-        expected = Series(
-            data=[0, 1, 2, 6, 7, 8],
-            index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
-            dtype=np.float64)
-        result = x.loc[y]
-        tm.assert_series_equal(result, expected)
-
-        result = x.loc[[1, 3]]
-        tm.assert_series_equal(result, expected)
-
-        # GH15424
-        y1 = Series([1, 3], index=[1, 2])
-        result = x.loc[y1]
-        tm.assert_series_equal(result, expected)
-
-        empty = Series(data=[], dtype=np.float64)
-        expected = Series([], index=MultiIndex(
-            levels=index.levels, labels=[[], []], dtype=np.float64))
-        result = x.loc[empty]
-        tm.assert_series_equal(result, expected)
-
-    def test_loc_getitem_array(self):
-        # GH15434
-        # passing an array as a key with a MultiIndex
-        index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
-        x = Series(index=index, data=range(9), dtype=np.float64)
-        y = np.array([1, 3])
-        expected = Series(
-            data=[0, 1, 2, 6, 7, 8],
-            index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
-            dtype=np.float64)
-        result = x.loc[y]
-        tm.assert_series_equal(result, expected)
-
-        # empty array:
-        empty = np.array([])
-        expected = Series([], index=MultiIndex(
-            levels=index.levels, labels=[[], []], dtype=np.float64))
-        result = x.loc[empty]
-        tm.assert_series_equal(result, expected)
-
-        # 0-dim array (scalar):
-        scalar = np.int64(1)
-        expected = Series(
-            data=[0, 1, 2],
-            index=['A', 'B', 'C'],
-            dtype=np.float64)
-        result = x.loc[scalar]
-        tm.assert_series_equal(result, expected)
-
-    def test_iloc_getitem_multiindex(self):
-        mi_labels = DataFrame(np.random.randn(4, 3),
-                              columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
-                              index=[['i', 'i', 'j', 'k'],
-                                     ['X', 'X', 'Y', 'Y']])
-
-        mi_int = DataFrame(np.random.randn(3, 3),
-                           columns=[[2, 2, 4], [6, 8, 10]],
-                           index=[[4, 4, 8], [8, 10, 12]])
-
-        # the first row
-        rs = mi_int.iloc[0]
-        with catch_warnings(record=True):
-            xp = mi_int.ix[4].ix[8]
-        tm.assert_series_equal(rs, xp, check_names=False)
-        assert rs.name == (4, 8)
-        assert xp.name == 8
-
-        # 2nd (last) columns
-        rs = mi_int.iloc[:, 2]
-        with catch_warnings(record=True):
-            xp = mi_int.ix[:, 2]
-        tm.assert_series_equal(rs, xp)
-
-        # corner column
-        rs = mi_int.iloc[2, 2]
-        with catch_warnings(record=True):
-            # First level is int - so use .loc rather than .ix (GH 21593)
-            xp = mi_int.loc[(8, 12), (4, 10)]
-        assert rs == xp
-
-        # this is basically regular indexing
-        rs = mi_labels.iloc[2, 2]
-        with catch_warnings(record=True):
-            xp = mi_labels.ix['j'].ix[:, 'j'].ix[0, 0]
-        assert rs == xp
-
-    def test_loc_multiindex(self):
-
-        mi_labels = DataFrame(np.random.randn(3, 3),
-                              columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
-                              index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
-
-        mi_int = DataFrame(np.random.randn(3, 3),
-                           columns=[[2, 2, 4], [6, 8, 10]],
-                           index=[[4, 4, 8], [8, 10, 12]])
-
-        # the first row
-        rs = mi_labels.loc['i']
-        with catch_warnings(record=True):
-            xp = mi_labels.ix['i']
-        tm.assert_frame_equal(rs, xp)
-
-        # 2nd (last) columns
-        rs = mi_labels.loc[:, 'j']
-        with catch_warnings(record=True):
-            xp = mi_labels.ix[:, 'j']
-        tm.assert_frame_equal(rs, xp)
-
-        # corner column
-        rs = mi_labels.loc['j'].loc[:, 'j']
-        with catch_warnings(record=True):
-            xp = mi_labels.ix['j'].ix[:, 'j']
-        tm.assert_frame_equal(rs, xp)
-
-        # with a tuple
-        rs = mi_labels.loc[('i', 'X')]
-        with catch_warnings(record=True):
-            xp = mi_labels.ix[('i', 'X')]
-        tm.assert_frame_equal(rs, xp)
-
-        rs = mi_int.loc[4]
-        with catch_warnings(record=True):
-            xp = mi_int.ix[4]
-        tm.assert_frame_equal(rs, xp)
-
-        # missing label
-        pytest.raises(KeyError, lambda: mi_int.loc[2])
-        with catch_warnings(record=True):
-            # GH 21593
-            pytest.raises(KeyError, lambda: mi_int.ix[2])
-
-    def test_getitem_partial_int(self):
-        # GH 12416
-        # with single item
-        l1 = [10, 20]
-        l2 = ['a', 'b']
-        df = DataFrame(index=range(2),
-                       columns=MultiIndex.from_product([l1, l2]))
-        expected = DataFrame(index=range(2),
-                             columns=l2)
-        result = df[20]
-        tm.assert_frame_equal(result, expected)
-
-        # with list
-        expected = DataFrame(index=range(2),
-                             columns=MultiIndex.from_product([l1[1:], l2]))
-        result = df[[20]]
-        tm.assert_frame_equal(result, expected)
-
-        # missing item:
-        with pytest.raises(KeyError, match='1'):
-            df[1]
-        with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
-            df[[1]]
-
-    def test_loc_multiindex_indexer_none(self):
-
-        # GH6788
-        # multi-index indexer is None (meaning take all)
-        attributes = ['Attribute' + str(i) for i in range(1)]
-        attribute_values = ['Value' + str(i) for i in range(5)]
-
-        index = MultiIndex.from_product([attributes, attribute_values])
-        df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
-        df = DataFrame(df, columns=index)
-        result = df[attributes]
-        tm.assert_frame_equal(result, df)
-
-        # GH 7349
-        # loc with a multi-index seems to be doing fallback
-        df = DataFrame(np.arange(12).reshape(-1, 1),
-                       index=MultiIndex.from_product([[1, 2, 3, 4],
-                                                      [1, 2, 3]]))
-
-        expected = df.loc[([1, 2], ), :]
-        result = df.loc[[1, 2]]
-        tm.assert_frame_equal(result, expected)
-
-    def test_loc_multiindex_incomplete(self):
-
-        # GH 7399
-        # incomplete indexers
-        s = Series(np.arange(15, dtype='int64'),
-                   MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
-        expected = s.loc[:, 'a':'c']
-
-        result = s.loc[0:4, 'a':'c']
-        tm.assert_series_equal(result, expected)
-        tm.assert_series_equal(result, expected)
-
-        result = s.loc[:4, 'a':'c']
-        tm.assert_series_equal(result, expected)
-        tm.assert_series_equal(result, expected)
-
-        result = s.loc[0:, 'a':'c']
-        tm.assert_series_equal(result, expected)
-        tm.assert_series_equal(result, expected)
-
-        # GH 7400
-        # multiindexer gettitem with list of indexers skips wrong element
-        s = Series(np.arange(15, dtype='int64'),
-                   MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
-        expected = s.iloc[[6, 7, 8, 12, 13, 14]]
-        result = s.loc[2:4:2, 'a':'c']
-        tm.assert_series_equal(result, expected)
-
-    def test_multiindex_perf_warn(self):
-
-        df = DataFrame({'jim': [0, 0, 1, 1],
-                        'joe': ['x', 'x', 'z', 'y'],
-                        'jolie': np.random.rand(4)}).set_index(['jim', 'joe'])
-
-        with tm.assert_produces_warning(PerformanceWarning,
-                                        clear=[pd.core.index]):
-            df.loc[(1, 'z')]
-
-        df = df.iloc[[2, 1, 3, 0]]
-        with tm.assert_produces_warning(PerformanceWarning):
-            df.loc[(0, )]
-
-    def test_series_getitem_multiindex(self):
-
-        # GH 6018
-        # series regression getitem with a multi-index
-
-        s = Series([1, 2, 3])
-        s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)])
-
-        result = s[:, 0]
-        expected = Series([1], index=[0])
-        tm.assert_series_equal(result, expected)
-
-        result = s.loc[:, 1]
-        expected = Series([2, 3], index=[1, 2])
-        tm.assert_series_equal(result, expected)
-
-        # xs
-        result = s.xs(0, level=0)
-        expected = Series([1], index=[0])
-        tm.assert_series_equal(result, expected)
-
-        result = s.xs(1, level=1)
-        expected = Series([2, 3], index=[1, 2])
-        tm.assert_series_equal(result, expected)
-
-        # GH6258
-        dt = list(date_range('20130903', periods=3))
-        idx = MultiIndex.from_product([list('AB'), dt])
-        s = Series([1, 3, 4, 1, 3, 4], index=idx)
-
-        result = s.xs('20130903', level=1)
-        expected = Series([1, 1], index=list('AB'))
-        tm.assert_series_equal(result, expected)
-
-        # GH5684
-        idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'),
-                                      ('b', 'two')])
-        s = Series([1, 2, 3, 4], index=idx)
-        s.index.set_names(['L1', 'L2'], inplace=True)
-        result = s.xs('one', level='L2')
-        expected = Series([1, 3], index=['a', 'b'])
-        expected.index.set_names(['L1'], inplace=True)
-        tm.assert_series_equal(result, expected)
-
-    def test_xs_multiindex(self):
-
-        # GH2903
-        columns = MultiIndex.from_tuples(
-            [('a', 'foo'), ('a', 'bar'), ('b', 'hello'),
-             ('b', 'world')], names=['lvl0', 'lvl1'])
-        df = DataFrame(np.random.randn(4, 4), columns=columns)
-        df.sort_index(axis=1, inplace=True)
-        result = df.xs('a', level='lvl0', axis=1)
-        expected = df.iloc[:, 0:2].loc[:, 'a']
-        tm.assert_frame_equal(result, expected)
-
-        result = df.xs('foo', level='lvl1', axis=1)
-        expected = df.iloc[:, 1:2].copy()
-        expected.columns = expected.columns.droplevel('lvl1')
-        tm.assert_frame_equal(result, expected)
-
-    def test_multiindex_setitem(self):
-
-        # GH 3738
-        # setting with a multi-index right hand side
-        arrays = [np.array(['bar', 'bar', 'baz', 'qux', 'qux', 'bar']),
-                  np.array(['one', 'two', 'one', 'one', 'two', 'one']),
-                  np.arange(0, 6, 1)]
-
-        df_orig = DataFrame(np.random.randn(6, 3), index=arrays,
-                            columns=['A', 'B', 'C']).sort_index()
-
-        expected = df_orig.loc[['bar']] * 2
-        df = df_orig.copy()
-        df.loc[['bar']] *= 2
-        tm.assert_frame_equal(df.loc[['bar']], expected)
-
-        # raise because these have differing levels
-        def f():
-            df.loc['bar'] *= 2
-
-        pytest.raises(TypeError, f)
-
-        # from SO
-        # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
-        df_orig = DataFrame.from_dict({'price': {
-            ('DE', 'Coal', 'Stock'): 2,
-            ('DE', 'Gas', 'Stock'): 4,
-            ('DE', 'Elec', 'Demand'): 1,
-            ('FR', 'Gas', 'Stock'): 5,
-            ('FR', 'Solar', 'SupIm'): 0,
-            ('FR', 'Wind', 'SupIm'): 0
-        }})
-        df_orig.index = MultiIndex.from_tuples(df_orig.index,
-                                               names=['Sit', 'Com', 'Type'])
-
-        expected = df_orig.copy()
-        expected.iloc[[0, 2, 3]] *= 2
-
-        idx = pd.IndexSlice
-        df = df_orig.copy()
-        df.loc[idx[:, :, 'Stock'], :] *= 2
-        tm.assert_frame_equal(df, expected)
-
-        df = df_orig.copy()
-        df.loc[idx[:, :, 'Stock'], 'price'] *= 2
-        tm.assert_frame_equal(df, expected)
-
-    def test_getitem_duplicates_multiindex(self):
-        # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
-        # the appropriate error, only in PY3 of course!
-
-        index = MultiIndex(levels=[['D', 'B', 'C'],
-                                   [0, 26, 27, 37, 57, 67, 75, 82]],
-                           labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
-                                   [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
-                           names=['tag', 'day'])
-        arr = np.random.randn(len(index), 1)
-        df = DataFrame(arr, index=index, columns=['val'])
-        result = df.val['D']
-        expected = Series(arr.ravel()[0:3], name='val', index=Index(
-            [26, 37, 57], name='day'))
-        tm.assert_series_equal(result, expected)
-
-        def f():
-            df.val['A']
-
-        pytest.raises(KeyError, f)
-
-        def f():
-            df.val['X']
-
-        pytest.raises(KeyError, f)
-
-        # A is treated as a special Timestamp
-        index = MultiIndex(levels=[['A', 'B', 'C'],
-                                   [0, 26, 27, 37, 57, 67, 75, 82]],
-                           labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
-                                   [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
-                           names=['tag', 'day'])
-        df = DataFrame(arr, index=index, columns=['val'])
-        result = df.val['A']
-        expected = Series(arr.ravel()[0:3], name='val', index=Index(
-            [26, 37, 57], name='day'))
-        tm.assert_series_equal(result, expected)
-
-        def f():
-            df.val['X']
-
-        pytest.raises(KeyError, f)
-
-        # GH 7866
-        # multi-index slicing with missing indexers
-        idx = MultiIndex.from_product([['A', 'B', 'C'],
-                                       ['foo', 'bar', 'baz']],
-                                      names=['one', 'two'])
-        s = Series(np.arange(9, dtype='int64'), index=idx).sort_index()
-
-        exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']],
-                                          names=['one', 'two'])
-        expected = Series(np.arange(3, dtype='int64'),
-                          index=exp_idx).sort_index()
-
-        result = s.loc[['A']]
-        tm.assert_series_equal(result, expected)
-        result = s.loc[['A', 'D']]
-        tm.assert_series_equal(result, expected)
-
-        # not any values found
-        pytest.raises(KeyError, lambda: s.loc[['D']])
-
-        # empty ok
-        result = s.loc[[]]
-        expected = s.iloc[[]]
-        tm.assert_series_equal(result, expected)
-
-        idx = pd.IndexSlice
-        expected = Series([0, 3, 6], index=MultiIndex.from_product(
-            [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index()
-
-        result = s.loc[idx[:, ['foo']]]
-        tm.assert_series_equal(result, expected)
-        result = s.loc[idx[:, ['foo', 'bah']]]
-        tm.assert_series_equal(result, expected)
-
-        # GH 8737
-        # empty indexer
-        multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'],
-                                               ['alpha', 'beta']))
-        df = DataFrame(
-            np.random.randn(5, 6), index=range(5), columns=multi_index)
-        df = df.sort_index(level=0, axis=1)
-
-        expected = DataFrame(index=range(5),
-                             columns=multi_index.reindex([])[0])
-        result1 = df.loc[:, ([], slice(None))]
-        result2 = df.loc[:, (['foo'], [])]
-        tm.assert_frame_equal(result1, expected)
-        tm.assert_frame_equal(result2, expected)
-
-        # regression from < 0.14.0
-        # GH 7914
-        df = DataFrame([[np.mean, np.median], ['mean', 'median']],
-                       columns=MultiIndex.from_tuples([('functs', 'mean'),
-                                                       ('functs', 'median')]),
-                       index=['function', 'name'])
-        result = df.loc['function', ('functs', 'mean')]
-        assert result == np.mean
-
-    def test_multiindex_assignment(self):
-
-        # GH3777 part 2
-
-        # mixed dtype
-        df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
-                       columns=list('abc'),
-                       index=[[4, 4, 8], [8, 10, 12]])
-        df['d'] = np.nan
-        arr = np.array([0., 1.])
-
-        with catch_warnings(record=True):
-            df.ix[4, 'd'] = arr
-            tm.assert_series_equal(df.ix[4, 'd'],
-                                   Series(arr, index=[8, 10], name='d'))
-
-        # single dtype
-        df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3),
-                       columns=list('abc'),
-                       index=[[4, 4, 8], [8, 10, 12]])
-
-        with catch_warnings(record=True):
-            df.ix[4, 'c'] = arr
-            exp = Series(arr, index=[8, 10], name='c', dtype='float64')
-            tm.assert_series_equal(df.ix[4, 'c'], exp)
-
-        # scalar ok
-        with catch_warnings(record=True):
-            df.ix[4, 'c'] = 10
-            exp = Series(10, index=[8, 10], name='c', dtype='float64')
-            tm.assert_series_equal(df.ix[4, 'c'], exp)
-
-        # invalid assignments
-        def f():
-            with catch_warnings(record=True):
-                df.ix[4, 'c'] = [0, 1, 2, 3]
-
-        pytest.raises(ValueError, f)
-
-        def f():
-            with catch_warnings(record=True):
-                df.ix[4, 'c'] = [0]
-
-        pytest.raises(ValueError, f)
-
-        # groupby example
-        NUM_ROWS = 100
-        NUM_COLS = 10
-        col_names = ['A' + num for num in
-                     map(str, np.arange(NUM_COLS).tolist())]
-        index_cols = col_names[:5]
-
-        df = DataFrame(np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
-                       dtype=np.int64, columns=col_names)
-        df = df.set_index(index_cols).sort_index()
-        grp = df.groupby(level=index_cols[:4])
-        df['new_col'] = np.nan
-
-        f_index = np.arange(5)
-
-        def f(name, df2):
-            return Series(np.arange(df2.shape[0]),
-                          name=df2.index.values[0]).reindex(f_index)
-
-        # TODO(wesm): unused?
-        # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T
-
-        # we are actually operating on a copy here
-        # but in this case, that's ok
-        for name, df2 in grp:
-            new_vals = np.arange(df2.shape[0])
-            with catch_warnings(record=True):
-                df.ix[name, 'new_col'] = new_vals
-
-    def test_multiindex_label_slicing_with_negative_step(self):
-        s = Series(np.arange(20),
-                   MultiIndex.from_product([list('abcde'), np.arange(4)]))
-        SLC = pd.IndexSlice
-
-        def assert_slices_equivalent(l_slc, i_slc):
-            tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc])
-            tm.assert_series_equal(s[l_slc], s.iloc[i_slc])
-            with catch_warnings(record=True):
-                tm.assert_series_equal(s.ix[l_slc], s.iloc[i_slc])
-
-        assert_slices_equivalent(SLC[::-1], SLC[::-1])
-
-        assert_slices_equivalent(SLC['d'::-1], SLC[15::-1])
-        assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1])
-
-        assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1])
-        assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1])
-
-        assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1])
-        assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1])
-        assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1])
-        assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1])
-        assert_slices_equivalent(SLC['b':'d':-1], SLC[:0])
-
-        assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1])
-        assert_slices_equivalent(SLC[:('c', 2):-1], SLC[:9:-1])
-        assert_slices_equivalent(SLC[('e', 0):('c', 2):-1], SLC[16:9:-1])
-
-    def test_multiindex_slice_first_level(self):
-        # GH 12697
-        freq = ['a', 'b', 'c', 'd']
-        idx = MultiIndex.from_product([freq, np.arange(500)])
-        df = DataFrame(list(range(2000)), index=idx, columns=['Test'])
-        df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
-        result = df_slice.loc['a']
-        expected = DataFrame(list(range(30, 71)),
-                             columns=['Test'], index=range(30, 71))
-        tm.assert_frame_equal(result, expected)
-        result = df_slice.loc['d']
-        expected = DataFrame(list(range(1530, 1571)),
-                             columns=['Test'], index=range(30, 71))
-        tm.assert_frame_equal(result, expected)
-
-    def test_multiindex_symmetric_difference(self):
-        # GH 13490
-        idx = MultiIndex.from_product([['a', 'b'], ['A', 'B']],
-                                      names=['a', 'b'])
-        result = idx ^ idx
-        assert result.names == idx.names
-
-        idx2 = idx.copy().rename(['A', 'B'])
-        result = idx ^ idx2
-        assert result.names == [None, None]
-
-    def test_multiindex_contains_dropped(self):
-        # GH 19027
-        # test that dropped MultiIndex levels are not in the MultiIndex
-        # despite continuing to be in the MultiIndex's levels
-        idx = MultiIndex.from_product([[1, 2], [3, 4]])
-        assert 2 in idx
-        idx = idx.drop(2)
-
-        # drop implementation keeps 2 in the levels
-        assert 2 in idx.levels[0]
-        # but it should no longer be in the index itself
-        assert 2 not in idx
-
-        # also applies to strings
-        idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']])
-        assert 'a' in idx
-        idx = idx.drop('a')
-        assert 'a' in idx.levels[0]
-        assert 'a' not in idx
-
-    @pytest.mark.parametrize("data, expected", [
-        (MultiIndex.from_product([(), ()]), True),
-        (MultiIndex.from_product([(1, 2), (3, 4)]), True),
-        (MultiIndex.from_product([('a', 'b'), (1, 2)]), False),
-    ])
-    def test_multiindex_is_homogeneous_type(self, data, expected):
-        assert data._is_homogeneous_type is expected
-
-    def test_getitem_simple(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        df = frame.T
-
-        col = df['foo', 'one']
-        tm.assert_almost_equal(col.values, df.values[:, 0])
-        with pytest.raises(KeyError):
-            df[('foo', 'four')]
-        with pytest.raises(KeyError):
-            df['foobar']
-
-    def test_series_getitem(
-            self, multiindex_year_month_day_dataframe_random_data):
-        ymd = multiindex_year_month_day_dataframe_random_data
-        s = ymd['A']
-
-        result = s[2000, 3]
-
-        # TODO(wesm): unused?
-        # result2 = s.loc[2000, 3]
-
-        expected = s.reindex(s.index[42:65])
-        expected.index = expected.index.droplevel(0).droplevel(0)
-        tm.assert_series_equal(result, expected)
-
-        result = s[2000, 3, 10]
-        expected = s[49]
-        assert result == expected
-
-        # fancy
-        expected = s.reindex(s.index[49:51])
-        result = s.loc[[(2000, 3, 10), (2000, 3, 13)]]
-        tm.assert_series_equal(result, expected)
-
-        with catch_warnings(record=True):
-            simplefilter("ignore", DeprecationWarning)
-            result = s.ix[[(2000, 3, 10), (2000, 3, 13)]]
-        tm.assert_series_equal(result, expected)
-
-        # key error
-        pytest.raises(KeyError, s.__getitem__, (2000, 3, 4))
-
-    def test_series_getitem_corner(
-            self, multiindex_year_month_day_dataframe_random_data):
-        ymd = multiindex_year_month_day_dataframe_random_data
-        s = ymd['A']
-
-        # don't segfault, GH #495
-        # out of bounds access
-        pytest.raises(IndexError, s.__getitem__, len(ymd))
-
-        # generator
-        result = s[(x > 0 for x in s)]
-        expected = s[s > 0]
-        tm.assert_series_equal(result, expected)
-
-    def test_series_setitem(
-            self, multiindex_year_month_day_dataframe_random_data):
-        ymd = multiindex_year_month_day_dataframe_random_data
-        s = ymd['A']
-
-        s[2000, 3] = np.nan
-        assert isna(s.values[42:65]).all()
-        assert notna(s.values[:42]).all()
-        assert notna(s.values[65:]).all()
-
-        s[2000, 3, 10] = np.nan
-        assert isna(s[49])
-
-    def test_series_slice_partial(self):
-        pass
-
-    def test_frame_getitem_setitem_boolean(
-            self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        df = frame.T.copy()
-        values = df.values
-
-        result = df[df > 0]
-        expected = df.where(df > 0)
-        tm.assert_frame_equal(result, expected)
-
-        df[df > 0] = 5
-        values[values > 0] = 5
-        tm.assert_almost_equal(df.values, values)
-
-        df[df == 5] = 0
-        values[values == 5] = 0
-        tm.assert_almost_equal(df.values, values)
-
-        # a df that needs alignment first
-        df[df[:-1] < 0] = 2
-        np.putmask(values[:-1], values[:-1] < 0, 2)
-        tm.assert_almost_equal(df.values, values)
-
-        with pytest.raises(TypeError, match='boolean values only'):
-            df[df * 0] = 2
-
-    def test_frame_getitem_setitem_slice(
-            self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        # getitem
-        result = frame.iloc[:4]
-        expected = frame[:4]
-        tm.assert_frame_equal(result, expected)
-
-        # setitem
-        cp = frame.copy()
-        cp.iloc[:4] = 0
-
-        assert (cp.values[:4] == 0).all()
-        assert (cp.values[4:] != 0).all()
-
-    def test_frame_getitem_setitem_multislice(self):
-        levels = [['t1', 't2'], ['a', 'b', 'c']]
-        labels = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
-        midx = MultiIndex(labels=labels, levels=levels, names=[None, 'id'])
-        df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx)
-
-        result = df.loc[:, 'value']
-        tm.assert_series_equal(df['value'], result)
-
-        with catch_warnings(record=True):
-            simplefilter("ignore", DeprecationWarning)
-            result = df.ix[:, 'value']
-        tm.assert_series_equal(df['value'], result)
-
-        result = df.loc[df.index[1:3], 'value']
-        tm.assert_series_equal(df['value'][1:3], result)
-
-        result = df.loc[:, :]
-        tm.assert_frame_equal(df, result)
-
-        result = df
-        df.loc[:, 'value'] = 10
-        result['value'] = 10
-        tm.assert_frame_equal(df, result)
-
-        df.loc[:, :] = 10
-        tm.assert_frame_equal(df, result)
-
-    def test_frame_getitem_multicolumn_empty_level(self):
-        f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
-        f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
-                     ['level3 item1', 'level3 item2']]
-
-        result = f['level1 item1']
-        expected = DataFrame([['1'], ['2'], ['3']], index=f.index,
-                             columns=['level3 item1'])
-        tm.assert_frame_equal(result, expected)
-
-    def test_frame_setitem_multi_column(self):
-        df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'],
-                                              [0, 1, 0, 1]])
-
-        cp = df.copy()
-        cp['a'] = cp['b']
-        tm.assert_frame_equal(cp['a'], cp['b'])
-
-        # set with ndarray
-        cp = df.copy()
-        cp['a'] = cp['b'].values
-        tm.assert_frame_equal(cp['a'], cp['b'])
-
-        # ---------------------------------------
-        # #1803
-        columns = MultiIndex.from_tuples([('A', '1'), ('A', '2'), ('B', '1')])
-        df = DataFrame(index=[1, 3, 5], columns=columns)
-
-        # Works, but adds a column instead of updating the two existing ones
-        df['A'] = 0.0  # Doesn't work
-        assert (df['A'].values == 0).all()
-
-        # it broadcasts
-        df['B', '1'] = [1, 2, 3]
-        df['A'] = df['B', '1']
-
-        sliced_a1 = df['A', '1']
-        sliced_a2 = df['A', '2']
-        sliced_b1 = df['B', '1']
-        tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
-        tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
-        assert sliced_a1.name == ('A', '1')
-        assert sliced_a2.name == ('A', '2')
-        assert sliced_b1.name == ('B', '1')
-
-    def test_getitem_tuple_plus_slice(self):
-        # GH #671
-        df = DataFrame({'a': lrange(10),
-                        'b': lrange(10),
-                        'c': np.random.randn(10),
-                        'd': np.random.randn(10)})
-
-        idf = df.set_index(['a', 'b'])
-
-        result = idf.loc[(0, 0), :]
-        expected = idf.loc[0, 0]
-        expected2 = idf.xs((0, 0))
-        with catch_warnings(record=True):
-            simplefilter("ignore", DeprecationWarning)
-            expected3 = idf.ix[0, 0]
-
-        tm.assert_series_equal(result, expected)
-        tm.assert_series_equal(result, expected2)
-        tm.assert_series_equal(result, expected3)
-
-    def test_getitem_setitem_tuple_plus_columns(
-            self, multiindex_year_month_day_dataframe_random_data):
-        # GH #1013
-        ymd = multiindex_year_month_day_dataframe_random_data
-        df = ymd[:5]
-
-        result = df.loc[(2000, 1, 6), ['A', 'B', 'C']]
-        expected = df.loc[2000, 1, 6][['A', 'B', 'C']]
-        tm.assert_series_equal(result, expected)
-
-    def test_xs(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        xs = frame.xs(('bar', 'two'))
-        xs2 = frame.loc[('bar', 'two')]
-
-        tm.assert_series_equal(xs, xs2)
-        tm.assert_almost_equal(xs.values, frame.values[4])
-
-        # GH 6574
-        # missing values in returned index should be preserrved
-        acc = [
-            ('a', 'abcde', 1),
-            ('b', 'bbcde', 2),
-            ('y', 'yzcde', 25),
-            ('z', 'xbcde', 24),
-            ('z', None, 26),
-            ('z', 'zbcde', 25),
-            ('z', 'ybcde', 26),
-        ]
-        df = DataFrame(acc,
-                       columns=['a1', 'a2', 'cnt']).set_index(['a1', 'a2'])
-        expected = DataFrame({'cnt': [24, 26, 25, 26]}, index=Index(
-            ['xbcde', np.nan, 'zbcde', 'ybcde'], name='a2'))
-
-        result = df.xs('z', level='a1')
-        tm.assert_frame_equal(result, expected)
-
-    def test_xs_partial(self, multiindex_dataframe_random_data,
-                        multiindex_year_month_day_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        ymd = multiindex_year_month_day_dataframe_random_data
-        result = frame.xs('foo')
-        result2 = frame.loc['foo']
-        expected = frame.T['foo'].T
-        tm.assert_frame_equal(result, expected)
-        tm.assert_frame_equal(result, result2)
-
-        result = ymd.xs((2000, 4))
-        expected = ymd.loc[2000, 4]
-        tm.assert_frame_equal(result, expected)
-
-        # ex from #1796
-        index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]],
-                           labels=[[0, 0, 0, 0, 1, 1, 1, 1],
-                                   [0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1,
-                                                              0, 1]])
-        df = DataFrame(np.random.randn(8, 4), index=index,
-                       columns=list('abcd'))
-
-        result = df.xs(['foo', 'one'])
-        expected = df.loc['foo', 'one']
-        tm.assert_frame_equal(result, expected)
-
-    def test_xs_with_duplicates(self, multiindex_dataframe_random_data):
-        # Issue #13719
-        frame = multiindex_dataframe_random_data
-        df_dup = concat([frame] * 2)
-        assert df_dup.index.is_unique is False
-        expected = concat([frame.xs('one', level='second')] * 2)
-        tm.assert_frame_equal(df_dup.xs('one', level='second'), expected)
-        tm.assert_frame_equal(df_dup.xs(['one'], level=['second']), expected)
-
-    def test_xs_level(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        result = frame.xs('two', level='second')
-        expected = frame[frame.index.get_level_values(1) == 'two']
-        expected.index = expected.index.droplevel(1)
-
-        tm.assert_frame_equal(result, expected)
-
-        index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'), (
-            'p', 'q', 'r')])
-        df = DataFrame(np.random.randn(3, 5), index=index)
-        result = df.xs('c', level=2)
-        expected = df[1:2]
-        expected.index = expected.index.droplevel(2)
-        tm.assert_frame_equal(result, expected)
-
-        # this is a copy in 0.14
-        result = frame.xs('two', level='second')
-
-        # setting this will give a SettingWithCopyError
-        # as we are trying to write a view
-        def f(x):
-            x[:] = 10
-
-        pytest.raises(com.SettingWithCopyError, f, result)
-
-    def test_xs_level_multiple(self):
-        text = """                      A       B       C       D        E
-one two three   four
-a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
-a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
-x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
-
-        df = read_csv(StringIO(text), sep=r'\s+', engine='python')
-
-        result = df.xs(('a', 4), level=['one', 'four'])
-        expected = df.xs('a').xs(4, level='four')
-        tm.assert_frame_equal(result, expected)
-
-        # this is a copy in 0.14
-        result = df.xs(('a', 4), level=['one', 'four'])
-
-        # setting this will give a SettingWithCopyError
-        # as we are trying to write a view
-        def f(x):
-            x[:] = 10
-
-        pytest.raises(com.SettingWithCopyError, f, result)
-
-        # GH2107
-        dates = lrange(20111201, 20111205)
-        ids = 'abcde'
-        idx = MultiIndex.from_tuples([x for x in cart_product(dates, ids)])
-        idx.names = ['date', 'secid']
-        df = DataFrame(np.random.randn(len(idx), 3), idx, ['X', 'Y', 'Z'])
-
-        rs = df.xs(20111201, level='date')
-        xp = df.loc[20111201, :]
-        tm.assert_frame_equal(rs, xp)
-
-    def test_xs_level0(self):
-        text = """                      A       B       C       D        E
-one two three   four
-a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
-a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
-x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
-
-        df = read_csv(StringIO(text), sep=r'\s+', engine='python')
-
-        result = df.xs('a', level=0)
-        expected = df.xs('a')
-        assert len(result) == 2
-        tm.assert_frame_equal(result, expected)
-
-    def test_xs_level_series(self, multiindex_dataframe_random_data,
-                             multiindex_year_month_day_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        ymd = multiindex_year_month_day_dataframe_random_data
-        s = frame['A']
-        result = s[:, 'two']
-        expected = frame.xs('two', level=1)['A']
-        tm.assert_series_equal(result, expected)
-
-        s = ymd['A']
-        result = s[2000, 5]
-        expected = ymd.loc[2000, 5]['A']
-        tm.assert_series_equal(result, expected)
-
-        # not implementing this for now
-
-        pytest.raises(TypeError, s.__getitem__, (2000, slice(3, 4)))
-
-        # result = s[2000, 3:4]
-        # lv =s.index.get_level_values(1)
-        # expected = s[(lv == 3) | (lv == 4)]
-        # expected.index = expected.index.droplevel(0)
-        # tm.assert_series_equal(result, expected)
-
-        # can do this though
-
-    def test_get_loc_single_level(self, single_level_multiindex):
-        single_level = single_level_multiindex
-        s = Series(np.random.randn(len(single_level)),
-                   index=single_level)
-        for k in single_level.values:
-            s[k]
-
-    def test_getitem_toplevel(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        df = frame.T
-
-        result = df['foo']
-        expected = df.reindex(columns=df.columns[:3])
-        expected.columns = expected.columns.droplevel(0)
-        tm.assert_frame_equal(result, expected)
-
-        result = df['bar']
-        result2 = df.loc[:, 'bar']
-
-        expected = df.reindex(columns=df.columns[3:5])
-        expected.columns = expected.columns.droplevel(0)
-        tm.assert_frame_equal(result, expected)
-        tm.assert_frame_equal(result, result2)
-
-    def test_getitem_setitem_slice_integers(self):
-        index = MultiIndex(levels=[[0, 1, 2], [0, 2]],
-                           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])
-
-        frame = DataFrame(np.random.randn(len(index), 4), index=index,
-                          columns=['a', 'b', 'c', 'd'])
-        res = frame.loc[1:2]
-        exp = frame.reindex(frame.index[2:])
-        tm.assert_frame_equal(res, exp)
-
-        frame.loc[1:2] = 7
-        assert (frame.loc[1:2] == 7).values.all()
-
-        series = Series(np.random.randn(len(index)), index=index)
-
-        res = series.loc[1:2]
-        exp = series.reindex(series.index[2:])
-        tm.assert_series_equal(res, exp)
-
-        series.loc[1:2] = 7
-        assert (series.loc[1:2] == 7).values.all()
-
-    def test_getitem_int(self, multiindex_dataframe_random_data):
-        levels = [[0, 1], [0, 1, 2]]
-        labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
-        index = MultiIndex(levels=levels, labels=labels)
-
-        frame = DataFrame(np.random.randn(6, 2), index=index)
-
-        result = frame.loc[1]
-        expected = frame[-3:]
-        expected.index = expected.index.droplevel(0)
-        tm.assert_frame_equal(result, expected)
-
-        # raises exception
-        pytest.raises(KeyError, frame.loc.__getitem__, 3)
-
-        # however this will work
-        frame = multiindex_dataframe_random_data
-        result = frame.iloc[2]
-        expected = frame.xs(frame.index[2])
-        tm.assert_series_equal(result, expected)
-
-    def test_getitem_partial(
-            self, multiindex_year_month_day_dataframe_random_data):
-        ymd = multiindex_year_month_day_dataframe_random_data
-        ymd = ymd.T
-        result = ymd[2000, 2]
-
-        expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1])
-        expected.columns = expected.columns.droplevel(0).droplevel(0)
-        tm.assert_frame_equal(result, expected)
-
-    def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        dft = frame.T
-        s = dft['foo', 'two']
-        dft['foo', 'two'] = s > s.median()
-        tm.assert_series_equal(dft['foo', 'two'], s > s.median())
-        # assert isinstance(dft._data.blocks[1].items, MultiIndex)
-
-        reindexed = dft.reindex(columns=[('foo', 'two')])
-        tm.assert_series_equal(reindexed['foo', 'two'], s > s.median())
-
-    def test_frame_setitem_ix(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        frame.loc[('bar', 'two'), 'B'] = 5
-        assert frame.loc[('bar', 'two'), 'B'] == 5
-
-        # with integer labels
-        df = frame.copy()
-        df.columns = lrange(3)
-        df.loc[('bar', 'two'), 1] = 7
-        assert df.loc[('bar', 'two'), 1] == 7
-
-        with catch_warnings(record=True):
-            simplefilter("ignore", DeprecationWarning)
-            df = frame.copy()
-            df.columns = lrange(3)
-            df.ix[('bar', 'two'), 1] = 7
-        assert df.loc[('bar', 'two'), 1] == 7
-
-    def test_fancy_slice_partial(
-            self, multiindex_dataframe_random_data,
-            multiindex_year_month_day_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        result = frame.loc['bar':'baz']
-        expected = frame[3:7]
-        tm.assert_frame_equal(result, expected)
-
-        ymd = multiindex_year_month_day_dataframe_random_data
-        result = ymd.loc[(2000, 2):(2000, 4)]
-        lev = ymd.index.labels[1]
-        expected = ymd[(lev >= 1) & (lev <= 3)]
-        tm.assert_frame_equal(result, expected)
-
-    def test_getitem_partial_column_select(self):
-        idx = MultiIndex(labels=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
-                         levels=[['a', 'b'], ['x', 'y'], ['p', 'q']])
-        df = DataFrame(np.random.rand(3, 2), index=idx)
-
-        result = df.loc[('a', 'y'), :]
-        expected = df.loc[('a', 'y')]
-        tm.assert_frame_equal(result, expected)
-
-        result = df.loc[('a', 'y'), [1, 0]]
-        expected = df.loc[('a', 'y')][[1, 0]]
-        tm.assert_frame_equal(result, expected)
-
-        with catch_warnings(record=True):
-            simplefilter("ignore", DeprecationWarning)
-            result = df.ix[('a', 'y'), [1, 0]]
-        tm.assert_frame_equal(result, expected)
-
-        pytest.raises(KeyError, df.loc.__getitem__,
-                      (('a', 'foo'), slice(None, None)))
-
-    def test_frame_getitem_view(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        df = frame.T.copy()
-
-        # this works because we are modifying the underlying array
-        # really a no-no
-        df['foo'].values[:] = 0
-        assert (df['foo'].values == 0).all()
-
-        # but not if it's mixed-type
-        df['foo', 'four'] = 'foo'
-        df = df.sort_index(level=0, axis=1)
-
-        # this will work, but will raise/warn as its chained assignment
-        def f():
-            df['foo']['one'] = 2
-            return df
-
-        pytest.raises(com.SettingWithCopyError, f)
-
-        try:
-            df = f()
-        except ValueError:
-            pass
-        assert (df['foo', 'one'] == 0).all()
-
-    def test_partial_set(
-            self, multiindex_year_month_day_dataframe_random_data):
-        # GH #397
-        ymd = multiindex_year_month_day_dataframe_random_data
-        df = ymd.copy()
-        exp = ymd.copy()
-        df.loc[2000, 4] = 0
-        exp.loc[2000, 4].values[:] = 0
-        tm.assert_frame_equal(df, exp)
-
-        df['A'].loc[2000, 4] = 1
-        exp['A'].loc[2000, 4].values[:] = 1
-        tm.assert_frame_equal(df, exp)
-
-        df.loc[2000] = 5
-        exp.loc[2000].values[:] = 5
-        tm.assert_frame_equal(df, exp)
-
-        # this works...for now
-        df['A'].iloc[14] = 5
-        assert df['A'][14] == 5
-
-    def test_getitem_lowerdim_corner(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        pytest.raises(KeyError, frame.loc.__getitem__,
-                      (('bar', 'three'), 'B'))
-
-        # in theory should be inserting in a sorted space????
-        frame.loc[('bar', 'three'), 'B'] = 0
-        assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0
-
-    # ---------------------------------------------------------------------
-    # AMBIGUOUS CASES!
-
-    def test_partial_ix_missing(
-            self, multiindex_year_month_day_dataframe_random_data):
-        pytest.skip("skipping for now")
-
-        ymd = multiindex_year_month_day_dataframe_random_data
-        result = ymd.loc[2000, 0]
-        expected = ymd.loc[2000]['A']
-        tm.assert_series_equal(result, expected)
-
-        # need to put in some work here
-
-        # self.ymd.loc[2000, 0] = 0
-        # assert (self.ymd.loc[2000]['A'] == 0).all()
-
-        # Pretty sure the second (and maybe even the first) is already wrong.
-        pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6))
-        pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6), 0)
-
-    # ---------------------------------------------------------------------
-
-    def test_int_series_slicing(
-            self, multiindex_year_month_day_dataframe_random_data):
-        ymd = multiindex_year_month_day_dataframe_random_data
-        s = ymd['A']
-        result = s[5:]
-        expected = s.reindex(s.index[5:])
-        tm.assert_series_equal(result, expected)
-
-        exp = ymd['A'].copy()
-        s[5:] = 0
-        exp.values[5:] = 0
-        tm.assert_numpy_array_equal(s.values, exp.values)
-
-        result = ymd[5:]
-        expected = ymd.reindex(s.index[5:])
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize('unicode_strings', [True, False])
-    def test_mixed_depth_get(self, unicode_strings):
-        # If unicode_strings is True, the column labels in dataframe
-        # construction will use unicode strings in Python 2 (pull request
-        # #17099).
-
-        arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
-                  ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
-                  ['', 'wx', 'wy', '', '', '']]
-
-        if unicode_strings:
-            arrays = [[u(s) for s in arr] for arr in arrays]
-
-        tuples = sorted(zip(*arrays))
-        index = MultiIndex.from_tuples(tuples)
-        df = DataFrame(np.random.randn(4, 6), columns=index)
-
-        result = df['a']
-        expected = df['a', '', ''].rename('a')
-        tm.assert_series_equal(result, expected)
-
-        result = df['routine1', 'result1']
-        expected = df['routine1', 'result1', '']
-        expected = expected.rename(('routine1', 'result1'))
-        tm.assert_series_equal(result, expected)
-
-    def test_mixed_depth_insert(self):
-        arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
-                  ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
-                  ['', 'wx', 'wy', '', '', '']]
-
-        tuples = sorted(zip(*arrays))
-        index = MultiIndex.from_tuples(tuples)
-        df = DataFrame(randn(4, 6), columns=index)
-
-        result = df.copy()
-        expected = df.copy()
-        result['b'] = [1, 2, 3, 4]
-        expected['b', '', ''] = [1, 2, 3, 4]
-        tm.assert_frame_equal(result, expected)
-
-    def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        expected = frame.copy()
-        result = frame.copy()
-        result.loc[['foo', 'bar']] = 0
-        expected.loc['foo'] = 0
-        expected.loc['bar'] = 0
-        tm.assert_frame_equal(result, expected)
-
-        expected = frame.copy()
-        result = frame.copy()
-        result.loc['foo':'bar'] = 0
-        expected.loc['foo'] = 0
-        expected.loc['bar'] = 0
-        tm.assert_frame_equal(result, expected)
-
-        expected = frame['A'].copy()
-        result = frame['A'].copy()
-        result.loc[['foo', 'bar']] = 0
-        expected.loc['foo'] = 0
-        expected.loc['bar'] = 0
-        tm.assert_series_equal(result, expected)
-
-        expected = frame['A'].copy()
-        result = frame['A'].copy()
-        result.loc['foo':'bar'] = 0
-        expected.loc['foo'] = 0
-        expected.loc['bar'] = 0
-        tm.assert_series_equal(result, expected)
-
-    def test_dataframe_insert_column_all_na(self):
-        # GH #1534
-        mix = MultiIndex.from_tuples([('1a', '2a'), ('1a', '2b'), ('1a', '2c')
-                                      ])
-        df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
-        s = Series({(1, 1): 1, (1, 2): 2})
-        df['new'] = s
-        assert df['new'].isna().all()
-
-    def test_set_column_scalar_with_ix(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        subset = frame.index[[1, 4, 5]]
-
-        frame.loc[subset] = 99
-        assert (frame.loc[subset].values == 99).all()
-
-        col = frame['B']
-        col[subset] = 97
-        assert (frame.loc[subset, 'B'] == 97).all()
-
-    def test_indexing_ambiguity_bug_1678(self):
-        columns = MultiIndex.from_tuples([('Ohio', 'Green'), ('Ohio', 'Red'), (
-            'Colorado', 'Green')])
-        index = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)
-                                        ])
-
-        frame = DataFrame(np.arange(12).reshape((4, 3)), index=index,
-                          columns=columns)
-
-        result = frame.iloc[:, 1]
-        exp = frame.loc[:, ('Ohio', 'Red')]
-        assert isinstance(result, Series)
-        tm.assert_series_equal(result, exp)
-
-    def test_nonunique_assignment_1750(self):
-        df = DataFrame([[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]],
-                       columns=list("ABCD"))
-
-        df = df.set_index(['A', 'B'])
-        ix = MultiIndex.from_tuples([(1, 1)])
-
-        df.loc[ix, "C"] = '_'
-
-        assert (df.xs((1, 1))['C'] == '_').all()
-
-    def test_indexing_over_hashtable_size_cutoff(self):
-        n = 10000
-
-        old_cutoff = _index._SIZE_CUTOFF
-        _index._SIZE_CUTOFF = 20000
-
-        s = Series(np.arange(n),
-                   MultiIndex.from_arrays((["a"] * n, np.arange(n))))
-
-        # hai it works!
-        assert s[("a", 5)] == 5
-        assert s[("a", 6)] == 6
-        assert s[("a", 7)] == 7
-
-        _index._SIZE_CUTOFF = old_cutoff
-
-    def test_iloc_mi(self):
-        # GH 13797
-        # Test if iloc can handle integer locations in MultiIndexed DataFrame
-
-        data = [['str00', 'str01'], ['str10', 'str11'], ['str20', 'srt21'],
-                ['str30', 'str31'], ['str40', 'str41']]
-
-        mi = MultiIndex.from_tuples(
-            [('CC', 'A'), ('CC', 'B'), ('CC', 'B'), ('BB', 'a'), ('BB', 'b')])
-
-        expected = DataFrame(data)
-        df_mi = DataFrame(data, index=mi)
-
-        result = DataFrame([[df_mi.iloc[r, c] for c in range(2)]
-                            for r in range(5)])
-
-        tm.assert_frame_equal(result, expected)
-
-    def test_getitem_multilevel_index_tuple_not_sorted(self):
-        index_columns = list("abc")
-        df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]],
-                       columns=index_columns + ["data"])
-        df = df.set_index(index_columns)
-        query_index = df.index[:1]
-        rs = df.loc[query_index, "data"]
-
-        xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c'])
-        xp = Series(['x'], index=xp_idx, name='data')
-        tm.assert_series_equal(rs, xp)
-
-    def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        df = frame.sort_index(level=1).T
-
-        # buglet with int typechecking
-        result = df.iloc[:, :np.int32(3)]
-        expected = df.reindex(columns=df.columns[:3])
-        tm.assert_frame_equal(result, expected)
-
-    def test_frame_getitem_not_sorted2(self):
-        # 13431
-        df = DataFrame({'col1': ['b', 'd', 'b', 'a'],
-                        'col2': [3, 1, 1, 2],
-                        'data': ['one', 'two', 'three', 'four']})
-
-        df2 = df.set_index(['col1', 'col2'])
-        df2_original = df2.copy()
-
-        df2.index.set_levels(['b', 'd', 'a'], level='col1', inplace=True)
-        df2.index.set_labels([0, 1, 0, 2], level='col1', inplace=True)
-        assert not df2.index.is_lexsorted()
-        assert not df2.index.is_monotonic
-
-        assert df2_original.index.equals(df2.index)
-        expected = df2.sort_index()
-        assert expected.index.is_lexsorted()
-        assert expected.index.is_monotonic
-
-        result = df2.sort_index(level=0)
-        assert result.index.is_lexsorted()
-        assert result.index.is_monotonic
-        tm.assert_frame_equal(result, expected)
-
-    def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
-        frame = multiindex_dataframe_random_data
-        df = frame.T
-        df['foo', 'four'] = 'foo'
-
-        arrays = [np.array(x) for x in zip(*df.columns.values)]
-
-        result = df['foo']
-        result2 = df.loc[:, 'foo']
-        expected = df.reindex(columns=df.columns[arrays[0] == 'foo'])
-        expected.columns = expected.columns.droplevel(0)
-        tm.assert_frame_equal(result, expected)
-        tm.assert_frame_equal(result2, expected)
-
-        df = df.T
-        result = df.xs('foo')
-        result2 = df.loc['foo']
-        expected = df.reindex(df.index[arrays[0] == 'foo'])
-        expected.index = expected.index.droplevel(0)
-        tm.assert_frame_equal(result, expected)
-        tm.assert_frame_equal(result2, expected)
-
-    def test_series_getitem_not_sorted(self):
-        arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'],
-                  ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
-        tuples = lzip(*arrays)
-        index = MultiIndex.from_tuples(tuples)
-        s = Series(randn(8), index=index)
-
-        arrays = [np.array(x) for x in zip(*index.values)]
-
-        result = s['qux']
-        result2 = s.loc['qux']
-        expected = s[arrays[0] == 'qux']
-        expected.index = expected.index.droplevel(0)
-        tm.assert_series_equal(result, expected)
-        tm.assert_series_equal(result2, expected)
-
-
-class TestMultiIndexSlicers(object):
-
-    def test_per_axis_per_level_getitem(self):
-
-        # GH6134
-        # example test case
-        ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl(
-            'C', 4), _mklbl('D', 2)])
-        df = DataFrame(np.arange(len(ix.get_values())), index=ix)
-
-        result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
-        expected = df.loc[[tuple([a, b, c, d])
-                           for a, b, c, d in df.index.values
-                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
-                               c == 'C1' or c == 'C3')]]
-        tm.assert_frame_equal(result, expected)
-
-        expected = df.loc[[tuple([a, b, c, d])
-                           for a, b, c, d in df.index.values
-                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
-                               c == 'C1' or c == 'C2' or c == 'C3')]]
-        result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :]
-        tm.assert_frame_equal(result, expected)
-
-        # test multi-index slicing with per axis and per index controls
-        index = MultiIndex.from_tuples([('A', 1), ('A', 2),
-                                        ('A', 3), ('B', 1)],
-                                       names=['one', 'two'])
-        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
-                                          ('b', 'foo'), ('b', 'bah')],
-                                         names=['lvl0', 'lvl1'])
-
-        df = DataFrame(
-            np.arange(16, dtype='int64').reshape(
-                4, 4), index=index, columns=columns)
-        df = df.sort_index(axis=0).sort_index(axis=1)
-
-        # identity
-        result = df.loc[(slice(None), slice(None)), :]
-        tm.assert_frame_equal(result, df)
-        result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
-        tm.assert_frame_equal(result, df)
-        result = df.loc[:, (slice(None), slice(None))]
-        tm.assert_frame_equal(result, df)
-
-        # index
-        result = df.loc[(slice(None), [1]), :]
-        expected = df.iloc[[0, 3]]
-        tm.assert_frame_equal(result, expected)
-
-        result = df.loc[(slice(None), 1), :]
-        expected = df.iloc[[0, 3]]
-        tm.assert_frame_equal(result, expected)
-
-        # columns
-        result = df.loc[:, (slice(None), ['foo'])]
-        expected = df.iloc[:, [1, 3]]
-        tm.assert_frame_equal(result, expected)
-
-        # both
-        result = df.loc[(slice(None), 1), (slice(None), ['foo'])]
-        expected = df.iloc[[0, 3], [1, 3]]
-        tm.assert_frame_equal(result, expected)
-
-        result = df.loc['A', 'a']
-        expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]),
-                             index=Index([1, 2, 3], name='two'),
-                             columns=Index(['bar', 'foo'], name='lvl1'))
-        tm.assert_frame_equal(result, expected)
-
-        result = df.loc[(slice(None), [1, 2]), :]
-        expected = df.iloc[[0, 1, 3]]
-        tm.assert_frame_equal(result, expected)
-
-        # multi-level series
-        s = Series(np.arange(len(ix.get_values())), index=ix)
-        result = s.loc['A1':'A3', :, ['C1', 'C3']]
-        expected = s.loc[[tuple([a, b, c, d])
-                          for a, b, c, d in s.index.values
-                          if (a == 'A1' or a == 'A2' or a == 'A3') and (
-                              c == 'C1' or c == 'C3')]]
-        tm.assert_series_equal(result, expected)
-
-        # boolean indexers
-        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
-        expected = df.iloc[[2, 3]]
-        tm.assert_frame_equal(result, expected)
-
-        def f():
-            df.loc[(slice(None), np.array([True, False])), :]
-
-        pytest.raises(ValueError, f)
-
-        # ambiguous cases
-        # these can be multiply interpreted (e.g. in this case
-        # as df.loc[slice(None),[1]] as well
-        pytest.raises(KeyError, lambda: df.loc[slice(None), [1]])
-
-        result = df.loc[(slice(None), [1]), :]
-        expected = df.iloc[[0, 3]]
-        tm.assert_frame_equal(result, expected)
-
-        # not lexsorted
-        assert df.index.lexsort_depth == 2
-        df = df.sort_index(level=1, axis=0)
-        assert df.index.lexsort_depth == 0
-
-        msg = ('MultiIndex slicing requires the index to be '
-               r'lexsorted: slicing on levels \[1\], lexsort depth 0')
-        with pytest.raises(UnsortedIndexError, match=msg):
-            df.loc[(slice(None), slice('bar')), :]
-
-        # GH 16734: not sorted, but no real slicing
-        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
-        tm.assert_frame_equal(result, df.iloc[[1, 3], :])
-
-    def test_multiindex_slicers_non_unique(self):
-
-        # GH 7106
-        # non-unique mi index support
-        df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'],
-                             B=['a', 'a', 'a', 'a'],
-                             C=[1, 2, 1, 3],
-                             D=[1, 2, 3, 4]))
-              .set_index(['A', 'B', 'C']).sort_index())
-        assert not df.index.is_unique
-        expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'],
-                                   C=[1, 1], D=[1, 3]))
-                    .set_index(['A', 'B', 'C']).sort_index())
-        result = df.loc[(slice(None), slice(None), 1), :]
-        tm.assert_frame_equal(result, expected)
-
-        # this is equivalent of an xs expression
-        result = df.xs(1, level=2, drop_level=False)
-        tm.assert_frame_equal(result, expected)
-
-        df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'],
-                             B=['a', 'a', 'a', 'a'],
-                             C=[1, 2, 1, 2],
-                             D=[1, 2, 3, 4]))
-              .set_index(['A', 'B', 'C']).sort_index())
-        assert not df.index.is_unique
-        expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'],
-                                   C=[1, 1], D=[1, 3]))
-                    .set_index(['A', 'B', 'C']).sort_index())
-        result = df.loc[(slice(None), slice(None), 1), :]
-        assert not result.index.is_unique
-        tm.assert_frame_equal(result, expected)
-
-        # GH12896
-        # numpy-implementation dependent bug
-        ints = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 14, 16,
-                17, 18, 19, 200000, 200000]
-        n = len(ints)
-        idx = MultiIndex.from_arrays([['a'] * n, ints])
-        result = Series([1] * n, index=idx)
-        result = result.sort_index()
-        result = result.loc[(slice(None), slice(100000))]
-        expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
-        tm.assert_series_equal(result, expected)
-
-    def test_multiindex_slicers_datetimelike(self):
-
-        # GH 7429
-        # buggy/inconsistent behavior when slicing with datetime-like
-        import datetime
-        dates = [datetime.datetime(2012, 1, 1, 12, 12, 12) +
-                 datetime.timedelta(days=i) for i in range(6)]
-        freq = [1, 2]
-        index = MultiIndex.from_product(
-            [dates, freq], names=['date', 'frequency'])
-
-        df = DataFrame(
-            np.arange(6 * 2 * 4, dtype='int64').reshape(
-                -1, 4), index=index, columns=list('ABCD'))
-
-        # multi-axis slicing
-        idx = pd.IndexSlice
-        expected = df.iloc[[0, 2, 4], [0, 1]]
-        result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),
-                               Timestamp('2012-01-03 12:12:12')),
-                         slice(1, 1)), slice('A', 'B')]
-        tm.assert_frame_equal(result, expected)
-
-        result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp(
-            '2012-01-03 12:12:12')], idx[1:1]), slice('A', 'B')]
-        tm.assert_frame_equal(result, expected)
-
-        result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),
-                               Timestamp('2012-01-03 12:12:12')), 1),
-                        slice('A', 'B')]
-        tm.assert_frame_equal(result, expected)
-
-        # with strings
-        result = df.loc[(slice('2012-01-01 12:12:12', '2012-01-03 12:12:12'),
-                         slice(1, 1)), slice('A', 'B')]
-        tm.assert_frame_equal(result, expected)
-
-        result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'], 1),
-                        idx['A', 'B']]
-        tm.assert_frame_equal(result, expected)
-
-    def test_multiindex_slicers_edges(self):
-        # GH 8132
-        # various edge cases
-        df = DataFrame(
-            {'A': ['A0'] * 5 + ['A1'] * 5 + ['A2'] * 5,
-             'B': ['B0', 'B0', 'B1', 'B1', 'B2'] * 3,
-             'DATE': ["2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30",
-                      "2013-08-06", "2013-06-11", "2013-07-02", "2013-07-09",
-                      "2013-07-30", "2013-08-06", "2013-09-03", "2013-10-01",
-                      "2013-07-09", "2013-08-06", "2013-09-03"],
-             'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2]})
-
-        df['DATE'] = pd.to_datetime(df['DATE'])
-        df1 = df.set_index(['A', 'B', 'DATE'])
-        df1 = df1.sort_index()
-
-        # A1 - Get all values under "A0" and "A1"
-        result = df1.loc[(slice('A1')), :]
-        expected = df1.iloc[0:10]
-        tm.assert_frame_equal(result, expected)
-
-        # A2 - Get all values from the start to "A2"
-        result = df1.loc[(slice('A2')), :]
-        expected = df1
-        tm.assert_frame_equal(result, expected)
-
-        # A3 - Get all values under "B1" or "B2"
-        result = df1.loc[(slice(None), slice('B1', 'B2')), :]
-        expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
-        tm.assert_frame_equal(result, expected)
-
-        # A4 - Get all values between 2013-07-02 and 2013-07-09
-        result = df1.loc[(slice(None), slice(None),
-                          slice('20130702', '20130709')), :]
-        expected = df1.iloc[[1, 2, 6, 7, 12]]
-        tm.assert_frame_equal(result, expected)
-
-        # B1 - Get all values in B0 that are also under A0, A1 and A2
-        result = df1.loc[(slice('A2'), slice('B0')), :]
-        expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
-        tm.assert_frame_equal(result, expected)
-
-        # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
-        # the As)
-        result = df1.loc[(slice(None), slice('B2')), :]
-        expected = df1
-        tm.assert_frame_equal(result, expected)
-
-        # B3 - Get all values from B1 to B2 and up to 2013-08-06
-        result = df1.loc[(slice(None), slice('B1', 'B2'),
-                          slice('2013-08-06')), :]
-        expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
-        tm.assert_frame_equal(result, expected)
-
-        # B4 - Same as A4 but the start of the date slice is not a key.
-        #      shows indexing on a partial selection slice
-        result = df1.loc[(slice(None), slice(None),
-                          slice('20130701', '20130709')), :]
-        expected = df1.iloc[[1, 2, 6, 7, 12]]
-        tm.assert_frame_equal(result, expected)
-
-    def test_per_axis_per_level_doc_examples(self):
-
-        # test index maker
-        idx = pd.IndexSlice
-
-        # from indexing.rst / advanced
-        index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
-                                         _mklbl('C', 4), _mklbl('D', 2)])
-        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
-                                          ('b', 'foo'), ('b', 'bah')],
-                                         names=['lvl0', 'lvl1'])
-        df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
-                       .reshape((len(index), len(columns))),
-                       index=index, columns=columns)
-        result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]
-        expected = df.loc[[tuple([a, b, c, d])
-                           for a, b, c, d in df.index.values
-                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
-                               c == 'C1' or c == 'C3')]]
-        tm.assert_frame_equal(result, expected)
-        result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :]
-        tm.assert_frame_equal(result, expected)
-
-        result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :]
-        expected = df.loc[[tuple([a, b, c, d])
-                           for a, b, c, d in df.index.values
-                           if (c == 'C1' or c == 'C3')]]
-        tm.assert_frame_equal(result, expected)
-        result = df.loc[idx[:, :, ['C1', 'C3']], :]
-        tm.assert_frame_equal(result, expected)
-
-        # not sorted
-        def f():
-            df.loc['A1', ('a', slice('foo'))]
-
-        pytest.raises(UnsortedIndexError, f)
-
-        # GH 16734: not sorted, but no real slicing
-        tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')],
-                              df.loc['A1'].iloc[:, [0, 2]])
-
-        df = df.sort_index(axis=1)
-
-        # slicing
-        df.loc['A1', (slice(None), 'foo')]
-        df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')]
-
-        # setitem
-        df.loc(axis=0)[:, :, ['C1', 'C3']] = -10
-
-    def test_loc_axis_arguments(self):
-
-        index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2),
-                                         _mklbl('C', 4), _mklbl('D', 2)])
-        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
-                                          ('b', 'foo'), ('b', 'bah')],
-                                         names=['lvl0', 'lvl1'])
-        df = DataFrame(np.arange(len(index) * len(columns), dtype='int64')
-                       .reshape((len(index), len(columns))),
-                       index=index,
-                       columns=columns).sort_index().sort_index(axis=1)
-
-        # axis 0
-        result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']]
-        expected = df.loc[[tuple([a, b, c, d])
-                           for a, b, c, d in df.index.values
-                           if (a == 'A1' or a == 'A2' or a == 'A3') and (
-                               c == 'C1' or c == 'C3')]]
-        tm.assert_frame_equal(result, expected)
-
-        result = df.loc(axis='index')[:, :, ['C1', 'C3']]
-        expected = df.loc[[tuple([a, b, c, d])
-                           for a, b, c, d in df.index.values
-                           if (c == 'C1' or c == 'C3')]]
-        tm.assert_frame_equal(result, expected)
-
-        # axis 1
-        result = df.loc(axis=1)[:, 'foo']
-        expected = df.loc[:, (slice(None), 'foo')]
-        tm.assert_frame_equal(result, expected)
-
-        result = df.loc(axis='columns')[:, 'foo']
-        expected = df.loc[:, (slice(None), 'foo')]
-        tm.assert_frame_equal(result, expected)
-
-        # invalid axis
-        def f():
-            df.loc(axis=-1)[:, :, ['C1', 'C3']]
-
-        pytest.raises(ValueError, f)
-
-        def f():
-            df.loc(axis=2)[:, :, ['C1', 'C3']]
-
-        pytest.raises(ValueError, f)
-
-        def f():
-            df.loc(axis='foo')[:, :, ['C1', 'C3']]
-
-        pytest.raises(ValueError, f)
-
-    def test_per_axis_per_level_setitem(self):
-
-        # test index maker
-        idx = pd.IndexSlice
-
-        # test multi-index slicing with per axis and per index controls
-        index = MultiIndex.from_tuples([('A', 1), ('A', 2),
-                                        ('A', 3), ('B', 1)],
-                                       names=['one', 'two'])
-        columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
-                                          ('b', 'foo'), ('b', 'bah')],
-                                         names=['lvl0', 'lvl1'])
-
-        df_orig = DataFrame(
-            np.arange(16, dtype='int64').reshape(
-                4, 4), index=index, columns=columns)
-        df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
-
-        # identity
-        df = df_orig.copy()
-        df.loc[(slice(None), slice(None)), :] = 100
-        expected = df_orig.copy()
-        expected.iloc[:, :] = 100
-        tm.assert_frame_equal(df, expected)
-
-        df = df_orig.copy()
-        df.loc(axis=0)[:, :] = 100
-        expected = df_orig.copy()
-        expected.iloc[:, :] = 100
-        tm.assert_frame_equal(df, expected)
-
-        df = df_orig.copy()
-        df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
-        expected = df_orig.copy()
-        expected.iloc[:, :] = 100
-        tm.assert_frame_equal(df, expected)
-
-        df = df_orig.copy()
-        df.loc[:, (slice(None), slice(None))] = 100
-        expected = df_orig.copy()
-        expected.iloc[:, :] = 100
-        tm.assert_frame_equal(df, expected)
-
-        # index
-        df = df_orig.copy()
-        df.loc[(slice(None), [1]), :] = 100
-        expected = df_orig.copy()
-        expected.iloc[[0, 3]] = 100
-        tm.assert_frame_equal(df, expected)
-
-        df = df_orig.copy()
-        df.loc[(slice(None), 1), :] = 100
-        expected = df_orig.copy()
-        expected.iloc[[0, 3]] = 100
-        tm.assert_frame_equal(df, expected)
-
-        df = df_orig.copy()
-        df.loc(axis=0)[:, 1] = 100
-        expected = df_orig.copy()
-        expected.iloc[[0, 3]] = 100
-        tm.assert_frame_equal(df, expected)
-
-        # columns
-        df = df_orig.copy()
-        df.loc[:, (slice(None), ['foo'])] = 100
-        expected = df_orig.copy()
-        expected.iloc[:, [1, 3]] = 100
-        tm.assert_frame_equal(df, expected)
-
-        # both
-        df = df_orig.copy()
-        df.loc[(slice(None), 1), (slice(None), ['foo'])] = 100
-        expected = df_orig.copy()
-        expected.iloc[[0, 3], [1, 3]] = 100
-        tm.assert_frame_equal(df, expected)
-
-        df = df_orig.copy()
-        df.loc[idx[:, 1], idx[:, ['foo']]] = 100
-        expected = df_orig.copy()
-        expected.iloc[[0, 3], [1, 3]] = 100
-        tm.assert_frame_equal(df, expected)
-
-        df = df_orig.copy()
-        df.loc['A', 'a'] = 100
-        expected = df_orig.copy()
-        expected.iloc[0:3, 0:2] = 100
-        tm.assert_frame_equal(df, expected)
-
-        # setting with a list-like
-        df = df_orig.copy()
-        df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
-            [[100, 100], [100, 100]], dtype='int64')
-        expected = df_orig.copy()
-        expected.iloc[[0, 3], [1, 3]] = 100
-        tm.assert_frame_equal(df, expected)
-
-        # not enough values
-        df = df_orig.copy()
-
-        def f():
-            df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
-                [[100], [100, 100]], dtype='int64')
-
-        pytest.raises(ValueError, f)
-
-        def f():
-            df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array(
-                [100, 100, 100, 100], dtype='int64')
-
-        pytest.raises(ValueError, f)
-
-        # with an alignable rhs
-        df = df_orig.copy()
-        df.loc[(slice(None), 1), (slice(None), ['foo'])] = df.loc[(slice(
-            None), 1), (slice(None), ['foo'])] * 5
-        expected = df_orig.copy()
-        expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
-        tm.assert_frame_equal(df, expected)
-
-        df = df_orig.copy()
-        df.loc[(slice(None), 1), (slice(None), ['foo'])] *= df.loc[(slice(
-            None), 1), (slice(None), ['foo'])]
-        expected = df_orig.copy()
-        expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
-        tm.assert_frame_equal(df, expected)
-
-        rhs = df_orig.loc[(slice(None), 1), (slice(None), ['foo'])].copy()
-        rhs.loc[:, ('c', 'bah')] = 10
-        df = df_orig.copy()
-        df.loc[(slice(None), 1), (slice(None), ['foo'])] *= rhs
-        expected = df_orig.copy()
-        expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
-        tm.assert_frame_equal(df, expected)
-
-
-@pytest.mark.filterwarnings('ignore:\\nPanel:FutureWarning')
-class TestMultiIndexPanel(object):
-
-    def test_iloc_getitem_panel_multiindex(self):
-
-        # GH 7199
-        # Panel with multi-index
-        multi_index = MultiIndex.from_tuples([('ONE', 'one'),
-                                              ('TWO', 'two'),
-                                              ('THREE', 'three')],
-                                             names=['UPPER', 'lower'])
-
-        simple_index = [x[0] for x in multi_index]
-        wd1 = Panel(items=['First', 'Second'],
-                    major_axis=['a', 'b', 'c', 'd'],
-                    minor_axis=multi_index)
-
-        wd2 = Panel(items=['First', 'Second'],
-                    major_axis=['a', 'b', 'c', 'd'],
-                    minor_axis=simple_index)
-
-        expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]]
-        result1 = wd1.iloc[0, [True, True, True, False], [0, 2]]  # WRONG
-        tm.assert_frame_equal(result1, expected1)
-
-        expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]]
-        result2 = wd2.iloc[0, [True, True, True, False], [0, 2]]
-        tm.assert_frame_equal(result2, expected2)
-
-        expected1 = DataFrame(index=['a'], columns=multi_index,
-                              dtype='float64')
-        result1 = wd1.iloc[0, [0], [0, 1, 2]]
-        tm.assert_frame_equal(result1, expected1)
-
-        expected2 = DataFrame(index=['a'], columns=simple_index,
-                              dtype='float64')
-        result2 = wd2.iloc[0, [0], [0, 1, 2]]
-        tm.assert_frame_equal(result2, expected2)
-
-        # GH 7516
-        mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')])
-        p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3),
-                  items=['a', 'b', 'c'], major_axis=mi,
-                  minor_axis=['u', 'v', 'w'])
-        result = p.iloc[:, 1, 0]
-        expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u')
-        tm.assert_series_equal(result, expected)
-
-        result = p.loc[:, (1, 'y'), 'u']
-        tm.assert_series_equal(result, expected)
-
-    def test_panel_setitem_with_multiindex(self):
-
-        # 10360
-        # failing with a multi-index
-        arr = np.array([[[1, 2, 3], [0, 0, 0]],
-                        [[0, 0, 0], [0, 0, 0]]],
-                       dtype=np.float64)
-
-        # reg index
-        axes = dict(items=['A', 'B'], major_axis=[0, 1],
-                    minor_axis=['X', 'Y', 'Z'])
-        p1 = Panel(0., **axes)
-        p1.iloc[0, 0, :] = [1, 2, 3]
-        expected = Panel(arr, **axes)
-        tm.assert_panel_equal(p1, expected)
-
-        # multi-indexes
-        axes['items'] = MultiIndex.from_tuples(
-            [('A', 'a'), ('B', 'b')])
-        p2 = Panel(0., **axes)
-        p2.iloc[0, 0, :] = [1, 2, 3]
-        expected = Panel(arr, **axes)
-        tm.assert_panel_equal(p2, expected)
-
-        axes['major_axis'] = MultiIndex.from_tuples(
-            [('A', 1), ('A', 2)])
-        p3 = Panel(0., **axes)
-        p3.iloc[0, 0, :] = [1, 2, 3]
-        expected = Panel(arr, **axes)
-        tm.assert_panel_equal(p3, expected)
-
-        axes['minor_axis'] = MultiIndex.from_product(
-            [['X'], range(3)])
-        p4 = Panel(0., **axes)
-        p4.iloc[0, 0, :] = [1, 2, 3]
-        expected = Panel(arr, **axes)
-        tm.assert_panel_equal(p4, expected)
-
-        arr = np.array(
-            [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]],
-            dtype=np.float64)
-        p5 = Panel(0., **axes)
-        p5.iloc[0, :, 0] = [1, 2]
-        expected = Panel(arr, **axes)
-        tm.assert_panel_equal(p5, expected)
-
-
-def test_multiindex_period_datetime():
-    # GH4861, using datetime in period of multiindex raises exception
-
-    idx1 = Index(['a', 'a', 'a', 'b', 'b'])
-    idx2 = period_range('2012-01', periods=len(idx1), freq='M')
-    s = Series(np.random.randn(len(idx1)), [idx1, idx2])
-
-    # try Period as index
-    expected = s.iloc[0]
-    result = s.loc['a', Period('2012-01')]
-    assert result == expected
-
-    # try datetime as index
-    result = s.loc['a', datetime(2012, 1, 1)]
-    assert result == expected

From 295703b585577ba3e6b731d6a8a5825c468bb60c Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 29 Nov 2018 14:54:15 +0100
Subject: [PATCH 34/78] MAINT Use list and dict comprehension (#23894)

---
 pandas/core/computation/align.py               |  5 ++---
 pandas/core/config.py                          |  7 ++-----
 pandas/core/dtypes/concat.py                   |  6 ++----
 pandas/core/frame.py                           | 18 ++++++------------
 pandas/core/groupby/ops.py                     |  6 ++----
 pandas/core/internals/blocks.py                |  5 +----
 pandas/core/panel.py                           | 10 ++++------
 pandas/core/reshape/melt.py                    |  5 ++---
 pandas/core/sparse/frame.py                    |  5 ++---
 pandas/core/window.py                          |  5 ++---
 pandas/io/excel.py                             |  7 +++----
 pandas/io/formats/printing.py                  |  8 ++++----
 pandas/io/formats/style.py                     |  6 ++----
 pandas/io/sas/sas_xport.py                     |  5 ++---
 pandas/io/stata.py                             |  8 ++++----
 pandas/plotting/_core.py                       |  9 +++------
 pandas/tests/frame/test_axis_select_reindex.py |  4 +---
 pandas/tests/frame/test_indexing.py            |  8 +++-----
 pandas/tests/frame/test_replace.py             | 15 ++++++---------
 pandas/tests/generic/test_generic.py           |  5 ++---
 pandas/tests/groupby/aggregate/test_cython.py  |  4 +---
 pandas/tests/groupby/test_groupby.py           | 12 +++---------
 pandas/tests/groupby/test_grouping.py          |  4 +---
 pandas/tests/groupby/test_rank.py              | 10 ++++------
 pandas/tests/indexes/multi/test_constructor.py | 10 ++++------
 pandas/tests/sparse/frame/test_frame.py        |  4 +---
 pandas/tests/sparse/series/test_series.py      |  8 ++++----
 27 files changed, 73 insertions(+), 126 deletions(-)

diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py
index f7f40a66af9c63..951174648091f7 100644
--- a/pandas/core/computation/align.py
+++ b/pandas/core/computation/align.py
@@ -30,9 +30,8 @@ def _align_core_single_unary_op(term):
 
 
 def _zip_axes_from_type(typ, new_axes):
-    axes = {}
-    for ax_ind, ax_name in compat.iteritems(typ._AXIS_NAMES):
-        axes[ax_name] = new_axes[ax_ind]
+    axes = {ax_name: new_axes[ax_ind]
+            for ax_ind, ax_name in compat.iteritems(typ._AXIS_NAMES)}
     return axes
 
 
diff --git a/pandas/core/config.py b/pandas/core/config.py
index 6b50ab9ffe7d44..0f43ca65d187ab 100644
--- a/pandas/core/config.py
+++ b/pandas/core/config.py
@@ -395,11 +395,8 @@ def __init__(self, *args):
         self.ops = list(zip(args[::2], args[1::2]))
 
     def __enter__(self):
-        undo = []
-        for pat, val in self.ops:
-            undo.append((pat, _get_option(pat, silent=True)))
-
-        self.undo = undo
+        self.undo = [(pat, _get_option(pat, silent=True))
+                     for pat, val in self.ops]
 
         for pat, val in self.ops:
             _set_option(pat, val, silent=True)
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 098ac3e0c00a53..58f1bcbfa74c01 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -373,10 +373,8 @@ def _maybe_unwrap(x):
         if sort_categories:
             categories = categories.sort_values()
 
-        new_codes = []
-        for c in to_union:
-            new_codes.append(_recode_for_categories(c.codes, c.categories,
-                                                    categories))
+        new_codes = [_recode_for_categories(c.codes, c.categories, categories)
+                     for c in to_union]
         new_codes = np.concatenate(new_codes)
     else:
         # ordered - to show a proper error message
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b4b8d151151bd6..ab240218ecda14 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3569,11 +3569,8 @@ def reindexer(value):
 
     @property
     def _series(self):
-        result = {}
-        for idx, item in enumerate(self.columns):
-            result[item] = Series(self._data.iget(idx), index=self.index,
-                                  name=item)
-        return result
+        return {item: Series(self._data.iget(idx), index=self.index, name=item)
+                for idx, item in enumerate(self.columns)}
 
     def lookup(self, row_labels, col_labels):
         """
@@ -3593,9 +3590,8 @@ def lookup(self, row_labels, col_labels):
         -----
         Akin to::
 
-            result = []
-            for row, col in zip(row_labels, col_labels):
-                result.append(df.get_value(row, col))
+            result = [df.get_value(row, col)
+                      for row, col in zip(row_labels, col_labels)]
 
         Examples
         --------
@@ -4600,10 +4596,8 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
         if len(by) > 1:
             from pandas.core.sorting import lexsort_indexer
 
-            keys = []
-            for x in by:
-                k = self._get_label_or_level_values(x, axis=axis)
-                keys.append(k)
+            keys = [self._get_label_or_level_values(x, axis=axis)
+                    for x in by]
             indexer = lexsort_indexer(keys, orders=ascending,
                                       na_position=na_position)
             indexer = ensure_platform_int(indexer)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 7391190b5f1922..8455c03953ad15 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -678,10 +678,8 @@ def groups(self):
 
         # this is mainly for compat
         # GH 3881
-        result = {}
-        for key, value in zip(self.binlabels, self.bins):
-            if key is not NaT:
-                result[key] = value
+        result = {key: value for key, value in zip(self.binlabels, self.bins)
+                  if key is not NaT}
         return result
 
     @property
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 4ae7a812e014d5..1b67c20530eb0e 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -2295,10 +2295,7 @@ def convert(self, *args, **kwargs):
                          'convert_timedeltas']
         fn_inputs += ['copy']
 
-        fn_kwargs = {}
-        for key in fn_inputs:
-            if key in kwargs:
-                fn_kwargs[key] = kwargs[key]
+        fn_kwargs = {key: kwargs[key] for key in fn_inputs if key in kwargs}
 
         # operate column-by-column
         def f(m, v, i):
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index b976dc27a69f70..65dfd45fcb9c22 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -336,9 +336,8 @@ def _compare_constructor(self, other, func):
             raise Exception('Can only compare identically-labeled '
                             'same type objects')
 
-        new_data = {}
-        for col in self._info_axis:
-            new_data[col] = func(self[col], other[col])
+        new_data = {col: func(self[col], other[col])
+                    for col in self._info_axis}
 
         d = self._construct_axes_dict(copy=False)
         return self._constructor(data=new_data, **d)
@@ -949,9 +948,8 @@ def to_frame(self, filter_observations=True):
             # size = N * K
             selector = slice(None, None)
 
-        data = {}
-        for item in self.items:
-            data[item] = self[item].values.ravel()[selector]
+        data = {item: self[item].values.ravel()[selector]
+                for item in self.items}
 
         def construct_multi_parts(idx, n_repeat, n_shuffle=1):
             # Replicates and shuffles MultiIndex, returns individual attributes
diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
index 2dd6dc71b9d98b..aafc0de64ee12c 100644
--- a/pandas/core/reshape/melt.py
+++ b/pandas/core/reshape/melt.py
@@ -448,9 +448,8 @@ def melt_stub(df, stub, i, j, value_vars, sep):
     value_vars_flattened = [e for sublist in value_vars for e in sublist]
     id_vars = list(set(df.columns.tolist()).difference(value_vars_flattened))
 
-    melted = []
-    for s, v in zip(stubnames, value_vars):
-        melted.append(melt_stub(df, s, i, j, v, sep))
+    melted = [melt_stub(df, s, i, j, v, sep)
+              for s, v in zip(stubnames, value_vars)]
     melted = melted[0].join(melted[1:], how='outer')
 
     if len(i) == 1:
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index a25ffa2744cb79..f1c46abfab0b21 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -339,9 +339,8 @@ def to_dense(self):
     def _apply_columns(self, func):
         """ get new SparseDataFrame applying func to each columns """
 
-        new_data = {}
-        for col, series in compat.iteritems(self):
-            new_data[col] = func(series)
+        new_data = {col: func(series)
+                    for col, series in compat.iteritems(self)}
 
         return self._constructor(
             data=new_data, index=self.index, columns=self.columns,
diff --git a/pandas/core/window.py b/pandas/core/window.py
index a079cea0fabd1f..faaef4211ca8e8 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -2479,9 +2479,8 @@ def dataframe_from_int_dict(data, frame_template):
             else:
                 raise ValueError("'pairwise' is not True/False")
         else:
-            results = {}
-            for i, col in enumerate(arg1.columns):
-                results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2))
+            results = {i: f(*_prep_binary(arg1.iloc[:, i], arg2))
+                       for i, col in enumerate(arg1.columns)}
             return dataframe_from_int_dict(results, arg1)
 
     else:
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index 880ff5a56804fa..03d873467dc10a 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -586,10 +586,9 @@ def _parse_cell(cell_contents, cell_typ):
             usecols = _maybe_convert_usecols(usecols)
 
             for i in range(sheet.nrows):
-                row = []
-                for j, (value, typ) in enumerate(zip(sheet.row_values(i),
-                                                     sheet.row_types(i))):
-                    row.append(_parse_cell(value, typ))
+                row = [_parse_cell(value, typ)
+                       for value, typ in zip(sheet.row_values(i),
+                                             sheet.row_types(i))]
                 data.append(row)
 
             if sheet.nrows == 0:
diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
index f814bf965a1e99..e671571560b192 100644
--- a/pandas/io/formats/printing.py
+++ b/pandas/io/formats/printing.py
@@ -110,10 +110,10 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
         nitems = max_seq_items or get_option("max_seq_items") or len(seq)
 
     s = iter(seq)
-    r = []
-    for i in range(min(nitems, len(seq))):  # handle sets, no slicing
-        r.append(pprint_thing(
-            next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))
+    # handle sets, no slicing
+    r = [pprint_thing(next(s),
+                      _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
+         for i in range(min(nitems, len(seq)))]
     body = ", ".join(r)
 
     if nitems < len(seq):
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index 8ee9ea5b3d9806..4fdcb978b4695a 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -1349,10 +1349,8 @@ def _get_level_lengths(index, hidden_elements=None):
             elif(j not in hidden_elements):
                 lengths[(i, last_label)] += 1
 
-    non_zero_lengths = {}
-    for element, length in lengths.items():
-        if(length >= 1):
-            non_zero_lengths[element] = length
+    non_zero_lengths = {
+        element: length for element, length in lengths.items() if length >= 1}
 
     return non_zero_lengths
 
diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py
index 6a38e3d2eb783d..3c607d62b42868 100644
--- a/pandas/io/sas/sas_xport.py
+++ b/pandas/io/sas/sas_xport.py
@@ -353,9 +353,8 @@ def _read_header(self):
         self.columns = [x['name'].decode() for x in self.fields]
 
         # Setup the dtype.
-        dtypel = []
-        for i, field in enumerate(self.fields):
-            dtypel.append(('s' + str(i), "S" + str(field['field_length'])))
+        dtypel = [('s' + str(i), "S" + str(field['field_length']))
+                  for i, field in enumerate(self.fields)]
         dtype = np.dtype(dtypel)
         self._dtype = dtype
 
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index d7beeb02a13c4b..403137b695cb7b 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -2947,10 +2947,10 @@ def _update_strl_names(self):
     def _convert_strls(self, data):
         """Convert columns to StrLs if either very large or in the
         convert_strl variable"""
-        convert_cols = []
-        for i, col in enumerate(data):
-            if self.typlist[i] == 32768 or col in self._convert_strl:
-                convert_cols.append(col)
+        convert_cols = [
+            col for i, col in enumerate(data)
+            if self.typlist[i] == 32768 or col in self._convert_strl]
+
         if convert_cols:
             ssw = StataStrLWriter(data, convert_cols)
             tab, new_data = ssw.generate_table()
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 8574275c8478b0..7ec97df69c05fe 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -157,9 +157,8 @@ def __init__(self, data, kind=None, by=None, subplots=False, sharex=None,
         # parse errorbar input if given
         xerr = kwds.pop('xerr', None)
         yerr = kwds.pop('yerr', None)
-        self.errors = {}
-        for kw, err in zip(['xerr', 'yerr'], [xerr, yerr]):
-            self.errors[kw] = self._parse_errorbars(kw, err)
+        self.errors = {kw: self._parse_errorbars(kw, err)
+                       for kw, err in zip(['xerr', 'yerr'], [xerr, yerr])}
 
         if not isinstance(secondary_y, (bool, tuple, list,
                                         np.ndarray, ABCIndexClass)):
@@ -1721,9 +1720,7 @@ def result(self):
 _klasses = [LinePlot, BarPlot, BarhPlot, KdePlot, HistPlot, BoxPlot,
             ScatterPlot, HexBinPlot, AreaPlot, PiePlot]
 
-_plot_klass = {}
-for klass in _klasses:
-    _plot_klass[klass._kind] = klass
+_plot_klass = {klass._kind: klass for klass in _klasses}
 
 
 def _plot(data, x=None, y=None, subplots=False,
diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
index de6ac251d117b9..fd2ccb2d36ec04 100644
--- a/pandas/tests/frame/test_axis_select_reindex.py
+++ b/pandas/tests/frame/test_axis_select_reindex.py
@@ -634,9 +634,7 @@ def test_align(self):
 
         left, right = self.frame.align(s, broadcast_axis=1)
         tm.assert_index_equal(left.index, self.frame.index)
-        expected = {}
-        for c in self.frame.columns:
-            expected[c] = s
+        expected = {c: s for c in self.frame.columns}
         expected = DataFrame(expected, index=self.frame.index,
                              columns=self.frame.columns)
         tm.assert_frame_equal(right, expected)
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
index de810a656c3eaf..0a61c844f1af83 100644
--- a/pandas/tests/frame/test_indexing.py
+++ b/pandas/tests/frame/test_indexing.py
@@ -1782,11 +1782,9 @@ def test_get_value(self):
 
     def test_lookup(self):
         def alt(df, rows, cols, dtype):
-            result = []
-            for r, c in zip(rows, cols):
-                with tm.assert_produces_warning(FutureWarning,
-                                                check_stacklevel=False):
-                    result.append(df.get_value(r, c))
+            with tm.assert_produces_warning(FutureWarning,
+                                            check_stacklevel=False):
+                result = [df.get_value(r, c) for r, c in zip(rows, cols)]
             return np.array(result, dtype=dtype)
 
         def testit(df):
diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py
index bfb358a3e8c458..d6536bbd3c97cf 100644
--- a/pandas/tests/frame/test_replace.py
+++ b/pandas/tests/frame/test_replace.py
@@ -806,9 +806,8 @@ def test_replace_input_formats_listlike(self):
         df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5],
                         'C': ['', 'asdf', 'fd']})
         filled = df.replace(to_rep, values)
-        expected = {}
-        for k, v in compat.iteritems(df):
-            expected[k] = v.replace(to_rep[k], values[k])
+        expected = {k: v.replace(to_rep[k], values[k])
+                    for k, v in compat.iteritems(df)}
         assert_frame_equal(filled, DataFrame(expected))
 
         result = df.replace([0, 2, 5], [5, 2, 0])
@@ -821,9 +820,8 @@ def test_replace_input_formats_listlike(self):
         df = DataFrame({'A': [np.nan, 0, np.nan], 'B': [0, 2, 5],
                         'C': ['', 'asdf', 'fd']})
         filled = df.replace(np.nan, values)
-        expected = {}
-        for k, v in compat.iteritems(df):
-            expected[k] = v.replace(np.nan, values[k])
+        expected = {k: v.replace(np.nan, values[k])
+                    for k, v in compat.iteritems(df)}
         assert_frame_equal(filled, DataFrame(expected))
 
         # list to list
@@ -844,9 +842,8 @@ def test_replace_input_formats_scalar(self):
         # dict to scalar
         to_rep = {'A': np.nan, 'B': 0, 'C': ''}
         filled = df.replace(to_rep, 0)
-        expected = {}
-        for k, v in compat.iteritems(df):
-            expected[k] = v.replace(to_rep[k], 0)
+        expected = {k: v.replace(to_rep[k], 0)
+                    for k, v in compat.iteritems(df)}
         assert_frame_equal(filled, DataFrame(expected))
 
         pytest.raises(TypeError, df.replace, to_rep, [np.nan, 0, ''])
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
index e7d5aebeb97f2b..7183fea85a069e 100644
--- a/pandas/tests/generic/test_generic.py
+++ b/pandas/tests/generic/test_generic.py
@@ -92,9 +92,8 @@ def test_rename(self):
     def test_get_numeric_data(self):
 
         n = 4
-        kwargs = {}
-        for i in range(self._ndim):
-            kwargs[self._typ._AXIS_NAMES[i]] = list(range(n))
+        kwargs = {self._typ._AXIS_NAMES[i]: list(range(n))
+                  for i in range(self._ndim)}
 
         # get the numeric data
         o = self._construct(n, **kwargs)
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index a0cc653a28b06f..ad5968bca5c033 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -45,9 +45,7 @@ def test_cythonized_aggers(op_name):
 
     # single column
     grouped = df.drop(['B'], axis=1).groupby('A')
-    exp = {}
-    for cat, group in grouped:
-        exp[cat] = op(group['C'])
+    exp = {cat: op(group['C']) for cat, group in grouped}
     exp = DataFrame({'C': exp})
     exp.index.name = 'A'
     result = op(grouped)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index d5147790a66a1c..162800b68de4fe 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -348,9 +348,7 @@ def test_attr_wrapper(ts):
 
     # this is pretty cool
     result = grouped.describe()
-    expected = {}
-    for name, gp in grouped:
-        expected[name] = gp.describe()
+    expected = {name: gp.describe() for name, gp in grouped}
     expected = DataFrame(expected).T
     assert_frame_equal(result, expected)
 
@@ -1312,9 +1310,7 @@ def test_skip_group_keys():
     grouped = tsf.groupby(lambda x: x.month, group_keys=False)
     result = grouped.apply(lambda x: x.sort_values(by='A')[:3])
 
-    pieces = []
-    for key, group in grouped:
-        pieces.append(group.sort_values(by='A')[:3])
+    pieces = [group.sort_values(by='A')[:3] for key, group in grouped]
 
     expected = pd.concat(pieces)
     assert_frame_equal(result, expected)
@@ -1322,9 +1318,7 @@ def test_skip_group_keys():
     grouped = tsf['A'].groupby(lambda x: x.month, group_keys=False)
     result = grouped.apply(lambda x: x.sort_values()[:3])
 
-    pieces = []
-    for key, group in grouped:
-        pieces.append(group.sort_values()[:3])
+    pieces = [group.sort_values()[:3] for key, group in grouped]
 
     expected = pd.concat(pieces)
     assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 546a37bf3d56aa..b6c20d31cddf3d 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -727,9 +727,7 @@ def test_multi_iter_frame(self, three_group):
         df['k1'] = np.array(['b', 'b', 'b', 'a', 'a', 'a'])
         df['k2'] = np.array(['1', '1', '1', '2', '2', '2'])
         grouped = df.groupby(['k1', 'k2'])
-        groups = {}
-        for key, gp in grouped:
-            groups[key] = gp
+        groups = {key: gp for key, gp in grouped}
         assert len(groups) == 2
 
         # axis = 1
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index e7e91572c56d17..59284f4a5d47e2 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -17,18 +17,16 @@ def test_rank_apply():
 
     result = df.groupby(['key1', 'key2']).value.rank()
 
-    expected = []
-    for key, piece in df.groupby(['key1', 'key2']):
-        expected.append(piece.value.rank())
+    expected = [piece.value.rank()
+                for key, piece in df.groupby(['key1', 'key2'])]
     expected = concat(expected, axis=0)
     expected = expected.reindex(result.index)
     tm.assert_series_equal(result, expected)
 
     result = df.groupby(['key1', 'key2']).value.rank(pct=True)
 
-    expected = []
-    for key, piece in df.groupby(['key1', 'key2']):
-        expected.append(piece.value.rank(pct=True))
+    expected = [piece.value.rank(pct=True)
+                for key, piece in df.groupby(['key1', 'key2'])]
     expected = concat(expected, axis=0)
     expected = expected.reindex(result.index)
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py
index fb15d674613d4d..4ad20e9d6ee81c 100644
--- a/pandas/tests/indexes/multi/test_constructor.py
+++ b/pandas/tests/indexes/multi/test_constructor.py
@@ -100,9 +100,8 @@ def test_copy_in_constructor():
 
 
 def test_from_arrays(idx):
-    arrays = []
-    for lev, lab in zip(idx.levels, idx.labels):
-        arrays.append(np.asarray(lev).take(lab))
+    arrays = [np.asarray(lev).take(lab)
+              for lev, lab in zip(idx.levels, idx.labels)]
 
     # list of arrays as input
     result = MultiIndex.from_arrays(arrays, names=idx.names)
@@ -117,9 +116,8 @@ def test_from_arrays(idx):
 
 def test_from_arrays_iterator(idx):
     # GH 18434
-    arrays = []
-    for lev, lab in zip(idx.levels, idx.labels):
-        arrays.append(np.asarray(lev).take(lab))
+    arrays = [np.asarray(lev).take(lab)
+              for lev, lab in zip(idx.levels, idx.labels)]
 
     # iterator as input
     result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
index f802598542cb90..f799eab2f64060 100644
--- a/pandas/tests/sparse/frame/test_frame.py
+++ b/pandas/tests/sparse/frame/test_frame.py
@@ -101,9 +101,7 @@ def test_constructor(self, float_frame, float_frame_int_kind,
             assert isinstance(series, SparseSeries)
 
         # construct from nested dict
-        data = {}
-        for c, s in compat.iteritems(float_frame):
-            data[c] = s.to_dict()
+        data = {c: s.to_dict() for c, s in compat.iteritems(float_frame)}
 
         sdf = SparseDataFrame(data)
         tm.assert_sp_frame_equal(sdf, float_frame)
diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
index fd5dbcd932993c..225ef96581e72c 100644
--- a/pandas/tests/sparse/series/test_series.py
+++ b/pandas/tests/sparse/series/test_series.py
@@ -843,10 +843,10 @@ def test_dropna(self):
 
     def test_homogenize(self):
         def _check_matches(indices, expected):
-            data = {}
-            for i, idx in enumerate(indices):
-                data[i] = SparseSeries(idx.to_int_index().indices,
-                                       sparse_index=idx, fill_value=np.nan)
+            data = {i: SparseSeries(idx.to_int_index().indices,
+                                    sparse_index=idx, fill_value=np.nan)
+                    for i, idx in enumerate(indices)}
+
             # homogenized is only valid with NaN fill values
             homogenized = spf.homogenize(data)
 

From 8ed347a25c34676399a46a4274c3dcf366f37ecf Mon Sep 17 00:00:00 2001
From: Thierry Moisan <thierry.moisan@gmail.com>
Date: Thu, 29 Nov 2018 08:55:30 -0500
Subject: [PATCH 35/78] DOC: fix DataFrame.replace and DataFrame.set_index
 (#23939)

---
 ci/code_checks.sh      |  2 +-
 pandas/core/generic.py | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 45cb1708258d73..5d0356dc8be9c4 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -147,7 +147,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
 
     MSG='Doctests frame.py' ; echo $MSG
     pytest -q --doctest-modules pandas/core/frame.py \
-        -k"-axes -combine -itertuples -join -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack"
+        -k"-axes -combine -itertuples -join -pivot_table -quantile -query -reindex -reindex_axis -round"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests series.py' ; echo $MSG
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index f43b93f200db3b..3c580d29dce8d4 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6118,7 +6118,7 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None):
             value to use for each column (columns not in the dict will not be
             filled). Regular expressions, strings and lists or dicts of such
             objects are also allowed.
-        inplace : boolean, default False
+        inplace : bool, default False
             If True, in place. Note: this will modify any
             other views on this object (e.g. a column from a DataFrame).
             Returns the caller if this is True.
@@ -6137,12 +6137,6 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None):
             .. versionchanged:: 0.23.0
                 Added to DataFrame.
 
-        See Also
-        --------
-        %(klass)s.fillna : Fill NA values.
-        %(klass)s.where : Replace values based on boolean condition.
-        Series.str.replace : Simple string replacement.
-
         Returns
         -------
         %(klass)s
@@ -6166,6 +6160,12 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None):
             * If a ``list`` or an ``ndarray`` is passed to `to_replace` and
               `value` but they are not the same length.
 
+        See Also
+        --------
+        %(klass)s.fillna : Fill NA values.
+        %(klass)s.where : Replace values based on boolean condition.
+        Series.str.replace : Simple string replacement.
+
         Notes
         -----
         * Regex substitution is performed under the hood with ``re.sub``. The
@@ -6280,7 +6280,7 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None):
         1   foo  new
         2  bait  xyz
 
-        >>> df.replace(regex={r'^ba.$':'new', 'foo':'xyz'})
+        >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})
               A    B
         0   new  abc
         1   xyz  new

From 0a4f40c782962a99f02e9b4c5d33658fc3867357 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Thu, 29 Nov 2018 09:53:21 -0600
Subject: [PATCH 36/78] API: Public data for Series and Index: .array and
 .to_numpy() (#23623)

---
 doc/source/10min.rst                 |  31 ++++++-
 doc/source/advanced.rst              |   2 +-
 doc/source/basics.rst                | 104 ++++++++++++++++------
 doc/source/categorical.rst           |   4 +-
 doc/source/dsintro.rst               |  40 ++++++++-
 doc/source/enhancingperf.rst         |   8 +-
 doc/source/extending.rst             |   2 +-
 doc/source/indexing.rst              |   2 +-
 doc/source/missing_data.rst          |   2 +-
 doc/source/reshaping.rst             |  14 +--
 doc/source/text.rst                  |   4 +-
 doc/source/timeseries.rst            |  10 +--
 doc/source/whatsnew/v0.24.0.rst      |  49 ++++++++++-
 pandas/core/base.py                  | 123 ++++++++++++++++++++++++++-
 pandas/core/frame.py                 |  44 ++++++++++
 pandas/core/generic.py               |   5 ++
 pandas/core/indexes/base.py          |  19 ++++-
 pandas/core/indexes/multi.py         |  19 +++++
 pandas/core/series.py                |  11 +++
 pandas/tests/frame/test_api.py       |   6 ++
 pandas/tests/indexes/test_numeric.py |   6 ++
 pandas/tests/test_base.py            |  51 +++++++++++
 22 files changed, 501 insertions(+), 55 deletions(-)

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
index ddcb9b5fc26c75..e04a8253e0bef1 100644
--- a/doc/source/10min.rst
+++ b/doc/source/10min.rst
@@ -113,13 +113,40 @@ Here is how to view the top and bottom rows of the frame:
    df.head()
    df.tail(3)
 
-Display the index, columns, and the underlying NumPy data:
+Display the index, columns:
 
 .. ipython:: python
 
    df.index
    df.columns
-   df.values
+
+:meth:`DataFrame.to_numpy` gives a NumPy representation of the underlying data.
+Note that his can be an expensive operation when your :class:`DataFrame` has
+columns with different data types, which comes down to a fundamental difference
+between pandas and NumPy: **NumPy arrays have one dtype for the entire array,
+while pandas DataFrames have one dtype per column**. When you call
+:meth:`DataFrame.to_numpy`, pandas will find the NumPy dtype that can hold *all*
+of the dtypes in the DataFrame. This may end up being ``object``, which requires
+casting every value to a Python object.
+
+For ``df``, our :class:`DataFrame` of all floating-point values,
+:meth:`DataFrame.to_numpy` is fast and doesn't require copying data.
+
+.. ipython:: python
+
+   df.to_numpy()
+
+For ``df2``, the :class:`DataFrame` with multiple dtypes,
+:meth:`DataFrame.to_numpy` is relatively expensive.
+
+.. ipython:: python
+
+   df2.to_numpy()
+
+.. note::
+
+   :meth:`DataFrame.to_numpy` does *not* include the index or column
+   labels in the output.
 
 :func:`~DataFrame.describe` shows a quick statistic summary of your data:
 
diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index 17214ab62b2782..24a1ac7be7d1db 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -188,7 +188,7 @@ highly performant. If you want to see only the used levels, you can use the
 
 .. ipython:: python
 
-   df[['foo', 'qux']].columns.values
+   df[['foo', 'qux']].columns.to_numpy()
 
    # for a specific level
    df[['foo', 'qux']].columns.get_level_values(0)
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index 69d1e105f62ab4..25e2c8cd1ff9a9 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -46,8 +46,8 @@ of elements to display is five, but you may pass a custom number.
 
 .. _basics.attrs:
 
-Attributes and the raw ndarray(s)
----------------------------------
+Attributes and Underlying Data
+------------------------------
 
 pandas objects have a number of attributes enabling you to access the metadata
 
@@ -65,14 +65,43 @@ Note, **these attributes can be safely assigned to**!
    df.columns = [x.lower() for x in df.columns]
    df
 
-To get the actual data inside a data structure, one need only access the
-**values** property:
+Pandas objects (:class:`Index`, :class:`Series`, :class:`DataFrame`) can be
+thought of as containers for arrays, which hold the actual data and do the
+actual computation. For many types, the underlying array is a
+:class:`numpy.ndarray`. However, pandas and 3rd party libraries may *extend*
+NumPy's type system to add support for custom arrays
+(see :ref:`basics.dtypes`).
+
+To get the actual data inside a :class:`Index` or :class:`Series`, use
+the **array** property
+
+.. ipython:: python
+
+   s.array
+   s.index.array
+
+Depending on the data type (see :ref:`basics.dtypes`), :attr:`~Series.array`
+be either a NumPy array or an :ref:`ExtensionArray <extending.extension-type>`.
+If you know you need a NumPy array, use :meth:`~Series.to_numpy`
+or :meth:`numpy.asarray`.
 
 .. ipython:: python
 
-    s.values
-    df.values
-    wp.values
+   s.to_numpy()
+   np.asarray(s)
+
+For Series and Indexes backed by NumPy arrays (like we have here), this will
+be the same as :attr:`~Series.array`. When the Series or Index is backed by
+a :class:`~pandas.api.extension.ExtensionArray`, :meth:`~Series.to_numpy`
+may involve copying data and coercing values.
+
+Getting the "raw data" inside a :class:`DataFrame` is possibly a bit more
+complex. When your ``DataFrame`` only has a single data type for all the
+columns, :atr:`DataFrame.to_numpy` will return the underlying data:
+
+.. ipython:: python
+
+   df.to_numpy()
 
 If a DataFrame or Panel contains homogeneously-typed data, the ndarray can
 actually be modified in-place, and the changes will be reflected in the data
@@ -87,6 +116,21 @@ unlike the axis labels, cannot be assigned to.
     strings are involved, the result will be of object dtype. If there are only
     floats and integers, the resulting array will be of float dtype.
 
+In the past, pandas recommended :attr:`Series.values` or :attr:`DataFrame.values`
+for extracting the data from a Series or DataFrame. You'll still find references
+to these in old code bases and online. Going forward, we recommend avoiding
+``.values`` and using ``.array`` or ``.to_numpy()``. ``.values`` has the following
+drawbacks:
+
+1. When your Series contains an :ref:`extension type <extending.extension-type>`, it's
+   unclear whether :attr:`Series.values` returns a NumPy array or the extension array.
+   :attr:`Series.array` will always return the actual array backing the Series,
+   while :meth:`Series.to_numpy` will always return a NumPy array.
+2. When your DataFrame contains a mixture of data types, :attr:`DataFrame.values` may
+   involve copying data and coercing values to a common dtype, a relatively expensive
+   operation. :meth:`DataFrame.to_numpy`, being a method, makes it clearer that the
+   returned NumPy array may not be a view on the same data in the DataFrame.
+
 .. _basics.accelerate:
 
 Accelerated operations
@@ -541,7 +585,7 @@ will exclude NAs on Series input by default:
 .. ipython:: python
 
    np.mean(df['one'])
-   np.mean(df['one'].values)
+   np.mean(df['one'].to_numpy())
 
 :meth:`Series.nunique` will return the number of unique non-NA values in a
 Series:
@@ -839,7 +883,7 @@ Series operation on each column or row:
 
    tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
                        index=pd.date_range('1/1/2000', periods=10))
-   tsdf.values[3:7] = np.nan
+   tsdf.iloc[3:7] = np.nan
 
 .. ipython:: python
 
@@ -1875,17 +1919,29 @@ dtypes
 ------
 
 For the most part, pandas uses NumPy arrays and dtypes for Series or individual
-columns of a DataFrame. The main types allowed in pandas objects are ``float``,
-``int``, ``bool``, and ``datetime64[ns]`` (note that NumPy does not support
-timezone-aware datetimes).
-
-In addition to NumPy's types, pandas :ref:`extends <extending.extension-types>`
-NumPy's type-system for a few cases.
-
-* :ref:`Categorical <categorical>`
-* :ref:`Datetime with Timezone <timeseries.timezone_series>`
-* :ref:`Period <timeseries.periods>`
-* :ref:`Interval <indexing.intervallindex>`
+columns of a DataFrame. NumPy provides support for ``float``,
+``int``, ``bool``, ``timedelta64[ns]`` and ``datetime64[ns]`` (note that NumPy
+does not support timezone-aware datetimes).
+
+Pandas and third-party libraries *extend* NumPy's type system in a few places.
+This section describes the extensions pandas has made internally.
+See :ref:`extending.extension-types` for how to write your own extension that
+works with pandas. See :ref:`ecosystem.extensions` for a list of third-party
+libraries that have implemented an extension.
+
+The following table lists all of pandas extension types. See the respective
+documentation sections for more on each type.
+
+=================== ========================= ================== ============================= =============================
+Kind of Data        Data Type                 Scalar             Array                         Documentation
+=================== ========================= ================== ============================= =============================
+tz-aware datetime   :class:`DatetimeArray`    :class:`Timestamp` :class:`arrays.DatetimeArray` :ref:`timeseries.timezone`
+Categorical         :class:`CategoricalDtype` (none)             :class:`Categorical`          :ref:`categorical`
+period (time spans) :class:`PeriodDtype`      :class:`Period`    :class:`arrays.PeriodArray`   :ref:`timeseries.periods`
+sparse              :class:`SparseDtype`      (none)             :class:`arrays.SparseArray`   :ref:`sparse`
+intervals           :class:`IntervalDtype`    :class:`Interval`  :class:`arrays.IntervalArray` :ref:`advanced.intervalindex`
+nullable integer    :clsas:`Int64Dtype`, ...  (none)             :class:`arrays.IntegerArray`  :ref:`integer_na`
+=================== ========================= ================== ============================= =============================
 
 Pandas uses the ``object`` dtype for storing strings.
 
@@ -1983,13 +2039,13 @@ from the current type (e.g. ``int`` to ``float``).
    df3
    df3.dtypes
 
-The ``values`` attribute on a DataFrame return the *lower-common-denominator* of the dtypes, meaning
+:meth:`DataFrame.to_numpy` will return the *lower-common-denominator* of the dtypes, meaning
 the dtype that can accommodate **ALL** of the types in the resulting homogeneous dtyped NumPy array. This can
 force some *upcasting*.
 
 .. ipython:: python
 
-   df3.values.dtype
+   df3.to_numpy().dtype
 
 astype
 ~~~~~~
@@ -2211,11 +2267,11 @@ dtypes:
                       'float64': np.arange(4.0, 7.0),
                       'bool1': [True, False, True],
                       'bool2': [False, True, False],
-                      'dates': pd.date_range('now', periods=3).values,
+                      'dates': pd.date_range('now', periods=3),
                       'category': pd.Series(list("ABC")).astype('category')})
    df['tdeltas'] = df.dates.diff()
    df['uint64'] = np.arange(3, 6).astype('u8')
-   df['other_dates'] = pd.date_range('20130101', periods=3).values
+   df['other_dates'] = pd.date_range('20130101', periods=3)
    df['tz_aware_dates'] = pd.date_range('20130101', periods=3, tz='US/Eastern')
    df
 
diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index 2f2430f02f89df..31f2430e4be885 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -178,7 +178,7 @@ are consistent among all columns.
 
     To perform table-wise conversion, where all labels in the entire ``DataFrame`` are used as
     categories for each column, the ``categories`` parameter can be determined programmatically by
-    ``categories = pd.unique(df.values.ravel())``.
+    ``categories = pd.unique(df.to_numpy().ravel())``.
 
 If you already have ``codes`` and ``categories``, you can use the 
 :func:`~pandas.Categorical.from_codes` constructor to save the factorize step 
@@ -955,7 +955,7 @@ Use ``.astype`` or ``union_categoricals`` to get ``category`` result.
    pd.concat([s1, s3])
 
    pd.concat([s1, s3]).astype('category')
-   union_categoricals([s1.values, s3.values])
+   union_categoricals([s1.array, s3.array])
 
 
 Following table summarizes the results of ``Categoricals`` related concatenations.
diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst
index ccd530d11b8f93..6195212873e75c 100644
--- a/doc/source/dsintro.rst
+++ b/doc/source/dsintro.rst
@@ -137,7 +137,43 @@ However, operations such as slicing will also slice the index.
     s[[4, 3, 1]]
     np.exp(s)
 
-We will address array-based indexing in a separate :ref:`section <indexing>`.
+.. note::
+
+   We will address array-based indexing like ``s[[4, 3, 1]]``
+   in :ref:`section <indexing>`.
+
+Like a NumPy array, a pandas Series has a :attr:`~Series.dtype`.
+
+.. ipython:: python
+
+   s.dtype
+
+This is often a NumPy dtype. However, pandas and 3rd-party libraries
+extend NumPy's type system in a few places, in which case the dtype would
+be a :class:`~pandas.api.extensions.ExtensionDtype`. Some examples within
+pandas are :ref:`categorical` and :ref:`integer_na`. See :ref:`basics.dtypes`
+for more.
+
+If you need the actual array backing a ``Series``, use :attr:`Series.array`.
+
+.. ipython:: python
+
+   s.array
+
+Again, this is often a NumPy array, but may instead be a
+:class:`~pandas.api.extensions.ExtensionArray`. See :ref:`basics.dtypes` for more.
+Accessing the array can be useful when you need to do some operation without the
+index (to disable :ref:`automatic alignment <dsintro.alignment>`, for example).
+
+While Series is ndarray-like, if you need an *actual* ndarray, then use
+:meth:`Series.to_numpy`.
+
+.. ipython:: python
+
+   s.to_numpy()
+
+Even if the Series is backed by a :class:`~pandas.api.extensions.ExtensionArray`,
+:meth:`Series.to_numpy` will return a NumPy ndarray.
 
 Series is dict-like
 ~~~~~~~~~~~~~~~~~~~
@@ -617,6 +653,8 @@ slicing, see the :ref:`section on indexing <indexing>`. We will address the
 fundamentals of reindexing / conforming to new sets of labels in the
 :ref:`section on reindexing <basics.reindexing>`.
 
+.. _dsintro.alignment:
+
 Data alignment and arithmetic
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst
index 2ca8a2b7ac0f88..1c873d604cfe0b 100644
--- a/doc/source/enhancingperf.rst
+++ b/doc/source/enhancingperf.rst
@@ -221,7 +221,7 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra
 
    You can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter
    to a Cython function. Instead pass the actual ``ndarray`` using the
-   ``.values`` attribute of the ``Series``. The reason is that the Cython
+   :meth:`Series.to_numpy`. The reason is that the Cython
    definition is specific to an ndarray and not the passed ``Series``.
 
    So, do not do this:
@@ -230,11 +230,13 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra
 
         apply_integrate_f(df['a'], df['b'], df['N'])
 
-   But rather, use ``.values`` to get the underlying ``ndarray``:
+   But rather, use :meth:`Series.to_numpy` to get the underlying ``ndarray``:
 
    .. code-block:: python
 
-        apply_integrate_f(df['a'].values, df['b'].values, df['N'].values)
+        apply_integrate_f(df['a'].to_numpy(),
+                          df['b'].to_numpy(),
+                          df['N'].to_numpy())
 
 .. note::
 
diff --git a/doc/source/extending.rst b/doc/source/extending.rst
index 6c47d0ae8bd848..7046981a3a3643 100644
--- a/doc/source/extending.rst
+++ b/doc/source/extending.rst
@@ -186,7 +186,7 @@ Instead, you should detect these cases and return ``NotImplemented``.
 When pandas encounters an operation like ``op(Series, ExtensionArray)``, pandas
 will
 
-1. unbox the array from the ``Series`` (roughly ``Series.values``)
+1. unbox the array from the ``Series`` (``Series.array``)
 2. call ``result = op(values, ExtensionArray)``
 3. re-box the result in a ``Series``
 
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 5740ab5fa69217..dc0c6dd027b3c5 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -190,7 +190,7 @@ columns.
 
    .. ipython:: python
 
-      df.loc[:,['B', 'A']] = df[['A', 'B']].values
+      df.loc[:,['B', 'A']] = df[['A', 'B']].to_numpy()
       df[['A', 'B']]
 
 
diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst
index 48646376916071..7b6d338ee5b6a6 100644
--- a/doc/source/missing_data.rst
+++ b/doc/source/missing_data.rst
@@ -678,7 +678,7 @@ Replacing more than one value is possible by passing a list.
 
 .. ipython:: python
 
-   df00 = df.values[0, 0]
+   df00 = df.iloc[0, 0]
    df.replace([1.5, df00], [np.nan, 'a'])
    df[1].dtype
 
diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst
index 8650b5ed1ba375..19857db1743e80 100644
--- a/doc/source/reshaping.rst
+++ b/doc/source/reshaping.rst
@@ -27,12 +27,12 @@ Reshaping by pivoting DataFrame objects
    tm.N = 3
 
    def unpivot(frame):
-           N, K = frame.shape
-           data = {'value': frame.values.ravel('F'),
-                   'variable': np.asarray(frame.columns).repeat(N),
-                   'date': np.tile(np.asarray(frame.index), K)}
-           columns = ['date', 'variable', 'value']
-           return pd.DataFrame(data, columns=columns)
+      N, K = frame.shape
+      data = {'value': frame.to_numpy().ravel('F'),
+              'variable': np.asarray(frame.columns).repeat(N),
+              'date': np.tile(np.asarray(frame.index), K)}
+      columns = ['date', 'variable', 'value']
+      return pd.DataFrame(data, columns=columns)
 
    df = unpivot(tm.makeTimeDataFrame())
 
@@ -54,7 +54,7 @@ For the curious here is how the above ``DataFrame`` was created:
 
    def unpivot(frame):
        N, K = frame.shape
-       data = {'value': frame.values.ravel('F'),
+       data = {'value': frame.to_numpy().ravel('F'),
                'variable': np.asarray(frame.columns).repeat(N),
                'date': np.tile(np.asarray(frame.index), K)}
        return pd.DataFrame(data, columns=['date', 'variable', 'value'])
diff --git a/doc/source/text.rst b/doc/source/text.rst
index d69888e406f0a6..d677cc38c98885 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -317,8 +317,8 @@ All one-dimensional list-likes can be combined in a list-like container (includi
 
     s
     u
-    s.str.cat([u.values,
-               u.index.astype(str).values], na_rep='-')
+    s.str.cat([u.array,
+               u.index.astype(str).array], na_rep='-')
 
 All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None:
 
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index cc377f45c4b8d0..4fa1cb8be92345 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -2436,22 +2436,22 @@ a convert on an aware stamp.
 
 .. note::
 
-   Using the ``.values`` accessor on a ``Series``, returns an NumPy array of the data.
+   Using :meth:`Series.to_numpy` on a ``Series``, returns a NumPy array of the data.
    These values are converted to UTC, as NumPy does not currently support timezones (even though it is *printing* in the local timezone!).
 
    .. ipython:: python
 
-      s_naive.values
-      s_aware.values
+      s_naive.to_numpy()
+      s_aware.to_numpy()
 
    Further note that once converted to a NumPy array these would lose the tz tenor.
 
    .. ipython:: python
 
-      pd.Series(s_aware.values)
+      pd.Series(s_aware.to_numpy())
 
    However, these can be easily converted:
 
    .. ipython:: python
 
-      pd.Series(s_aware.values).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
+      pd.Series(s_aware.to_numpy()).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index d4b2fefff322fc..d305cd7bf47e59 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -24,8 +24,53 @@ New features
   the user to override the engine's default behavior to include or omit the
   dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
 - :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`)
-- :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing
-the user to specify which decimal separator should be used in the output. (:issue:`23614`)
+- :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing the user to specify which decimal separator should be used in the output. (:issue:`23614`)
+
+.. _whatsnew_0240.values_api:
+
+Accessing the values in a Series or Index
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:attr:`Series.array` and :attr:`Index.array` have been added for extracting the array backing a
+``Series`` or ``Index``.
+
+.. ipython:: python
+
+   idx = pd.period_range('2000', periods=4)
+   idx.array
+   pd.Series(idx).array
+
+Historically, this would have been done with ``series.values``, but with
+``.values`` it was unclear whether the returned value would be the actual array,
+some transformation of it, or one of pandas custom arrays (like
+``Categorical``). For example, with :class:`PeriodIndex`, ``.values`` generates
+a new ndarray of period objects each time.
+
+.. ipython:: python
+
+   id(idx.values)
+   id(idx.values)
+
+If you need an actual NumPy array, use :meth:`Series.to_numpy` or :meth:`Index.to_numpy`.
+
+.. ipython:: python
+
+   idx.to_numpy()
+   pd.Series(idx).to_numpy()
+
+For Series and Indexes backed by normal NumPy arrays, this will be the same thing (and the same
+as ``.values``).
+
+.. ipython:: python
+
+   ser = pd.Series([1, 2, 3])
+   ser.array
+   ser.to_numpy()
+
+We haven't removed or deprecated :attr:`Series.values` or :attr:`DataFrame.values`, but we
+recommend and using ``.array`` or ``.to_numpy()`` instead.
+
+See :ref:`basics.dtypes` and :ref:`dsintro.attrs` for more.
 
 .. _whatsnew_0240.enhancements.extension_array_operators:
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
index fd303182959a5d..86de25444cf4c0 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -15,8 +15,8 @@
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.common import (
-    is_datetimelike, is_extension_array_dtype, is_extension_type, is_list_like,
-    is_object_dtype, is_scalar)
+    is_datetime64tz_dtype, is_datetimelike, is_extension_array_dtype,
+    is_extension_type, is_list_like, is_object_dtype, is_scalar)
 from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna
 
@@ -777,6 +777,125 @@ def base(self):
                       FutureWarning, stacklevel=2)
         return self.values.base
 
+    @property
+    def array(self):
+        # type: () -> Union[np.ndarray, ExtensionArray]
+        """
+        The actual Array backing this Series or Index.
+
+        .. versionadded:: 0.24.0
+
+        Returns
+        -------
+        array : numpy.ndarray or ExtensionArray
+            This is the actual array stored within this object. This differs
+            from ``.values`` which may require converting the data
+            to a different form.
+
+        See Also
+        --------
+        Index.to_numpy : Similar method that always returns a NumPy array.
+        Series.to_numpy : Similar method that always returns a NumPy array.
+
+        Notes
+        -----
+        This table lays out the different array types for each extension
+        dtype within pandas.
+
+        ================== =============================
+        dtype              array type
+        ================== =============================
+        category           Categorical
+        period             PeriodArray
+        interval           IntervalArray
+        IntegerNA          IntegerArray
+        datetime64[ns, tz] DatetimeArray
+        ================== =============================
+
+        For any 3rd-party extension types, the array type will be an
+        ExtensionArray.
+
+        For all remaining dtypes ``.array`` will be the :class:`numpy.ndarray`
+        stored within. If you absolutely need a NumPy array (possibly with
+        copying / coercing data), then use :meth:`Series.to_numpy` instead.
+
+        .. note::
+
+           ``.array`` will always return the underlying object backing the
+           Series or Index. If a future version of pandas adds a specialized
+           extension type for a data type, then the return type of ``.array``
+           for that data type will change from an object-dtype ndarray to the
+           new ExtensionArray.
+
+        Examples
+        --------
+        >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
+        >>> ser.array
+        [a, b, a]
+        Categories (2, object): [a, b]
+        """
+        return self._values
+
+    def to_numpy(self):
+        """
+        A NumPy ndarray representing the values in this Series or Index.
+
+        .. versionadded:: 0.24.0
+
+        The returned array will be the same up to equality (values equal
+        in `self` will be equal in the returned array; likewise for values
+        that are not equal). When `self` contains an ExtensionArray, the
+        dtype may be different. For example, for a category-dtype Series,
+        ``to_numpy()`` will return a NumPy array and the categorical dtype
+        will be lost.
+
+        Returns
+        -------
+        numpy.ndarray
+
+        See Also
+        --------
+        Series.array : Get the actual data stored within.
+        Index.array : Get the actual data stored within.
+        DataFrame.to_numpy : Similar method for DataFrame.
+
+        Notes
+        -----
+        For NumPy dtypes, this will be a reference to the actual data stored
+        in this Series or Index. Modifying the result in place will modify
+        the data stored in the Series or Index (not that we recommend doing
+        that).
+
+        For extension types, ``to_numpy()`` *may* require copying data and
+        coercing the result to a NumPy type (possibly object), which may be
+        expensive. When you need a no-copy reference to the underlying data,
+        :attr:`Series.array` should be used instead.
+
+        This table lays out the different dtypes and return types of
+        ``to_numpy()`` for various dtypes within pandas.
+
+        ================== ================================
+        dtype              array type
+        ================== ================================
+        category[T]        ndarray[T] (same dtype as input)
+        period             ndarray[object] (Periods)
+        interval           ndarray[object] (Intervals)
+        IntegerNA          ndarray[object]
+        datetime64[ns, tz] ndarray[object] (Timestamps)
+        ================== ================================
+
+        Examples
+        --------
+        >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
+        >>> ser.to_numpy()
+        array(['a', 'b', 'a'], dtype=object)
+        """
+        if (is_extension_array_dtype(self.dtype) or
+                is_datetime64tz_dtype(self.dtype)):
+            # TODO(DatetimeArray): remove the second clause.
+            return np.asarray(self._values)
+        return self._values
+
     @property
     def _ndarray_values(self):
         # type: () -> np.ndarray
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ab240218ecda14..06519da9a26d56 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1187,6 +1187,50 @@ def from_dict(cls, data, orient='columns', dtype=None, columns=None):
 
         return cls(data, index=index, columns=columns, dtype=dtype)
 
+    def to_numpy(self):
+        """
+        Convert the DataFrame to a NumPy array.
+
+        .. versionadded:: 0.24.0
+
+        The dtype of the returned array will be the common NumPy
+        dtype of all types in the DataFrame. For example,
+        if the dtypes are ``float16`` and ``float32``, the results
+        dtype will be ``float32``. This may require copying data and
+        coercing values, which may be expensive.
+
+        Returns
+        -------
+        array : numpy.ndarray
+
+        See Also
+        --------
+        Series.to_numpy : Similar method for Series.
+
+        Examples
+        --------
+        >>> pd.DataFrame({"A": [1, 2], "B": [3, 4]}).to_numpy()
+        array([[1, 3],
+               [2, 4]])
+
+        With heterogenous data, the lowest common type will have to
+        be used.
+
+        >>> df = pd.DataFrame({"A": [1, 2], "B": [3.0, 4.5]})
+        >>> df.to_numpy()
+        array([[1. , 3. ],
+               [2. , 4.5]])
+
+        For a mix of numeric and non-numeric types, the output array will
+        have object dtype.
+
+        >>> df['C'] = pd.date_range('2000', periods=2)
+        >>> df.to_numpy()
+        array([[1, 3.0, Timestamp('2000-01-01 00:00:00')],
+               [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object)
+        """
+        return self.values
+
     def to_dict(self, orient='dict', into=dict):
         """
         Convert the DataFrame to a dictionary.
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 3c580d29dce8d4..08c07da39128f6 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5164,6 +5164,10 @@ def values(self):
         """
         Return a Numpy representation of the DataFrame.
 
+        .. warning::
+
+           We recommend using :meth:`DataFrame.to_numpy` instead.
+
         Only the values in the DataFrame will be returned, the axes labels
         will be removed.
 
@@ -5225,6 +5229,7 @@ def values(self):
 
         See Also
         --------
+        DataFrame.to_numpy : Recommended alternative to this method.
         pandas.DataFrame.index : Retrieve the index labels.
         pandas.DataFrame.columns : Retrieving the column names.
         """
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index de59c035b81b52..e850db4178f415 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3542,6 +3542,23 @@ def _wrap_joined_index(self, joined, other):
     @property
     def values(self):
         """
+        Return an array representing the data in the Index.
+
+        .. warning::
+
+           We recommend using :attr:`Index.array` or
+           :meth:`Index.to_numpy`, depending on whether you need
+           a reference to the underlying data or a NumPy array.
+
+        Returns
+        -------
+        array: numpy.ndarray or ExtensionArray
+
+        See Also
+        --------
+        Index.array : Reference to the underlying data.
+        Index.to_numpy : A NumPy array representing the underlying data.
+
         Return the underlying data as an ndarray.
         """
         return self._data.view(np.ndarray)
@@ -3575,7 +3592,7 @@ def _values(self):
         values
         _ndarray_values
         """
-        return self.values
+        return self._data
 
     def get_values(self):
         """
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index f03376c32f7f46..567834b04c1ca4 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -430,6 +430,25 @@ def from_product(cls, iterables, sortorder=None, names=None):
     def levels(self):
         return self._levels
 
+    @property
+    def _values(self):
+        # We override here, since our parent uses _data, which we dont' use.
+        return self.values
+
+    @property
+    def array(self):
+        """
+        Raises a ValueError for `MultiIndex` because there's no single
+        array backing a MultiIndex.
+
+        Raises
+        ------
+        ValueError
+        """
+        msg = ("MultiIndex has no single backing array. Use "
+               "'MultiIndex.to_numpy()' to get a NumPy array of tuples.")
+        raise ValueError(msg)
+
     @property
     def _is_homogeneous_type(self):
         """Whether the levels of a MultiIndex all have the same dtype.
diff --git a/pandas/core/series.py b/pandas/core/series.py
index c3bcd2d76c27ab..0d6c9f4d845da2 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -431,10 +431,21 @@ def values(self):
         """
         Return Series as ndarray or ndarray-like depending on the dtype.
 
+        .. warning::
+
+           We recommend using :attr:`Series.array` or
+           :Series:`Index.to_numpy`, depending on whether you need
+           a reference to the underlying data or a NumPy array.
+
         Returns
         -------
         arr : numpy.ndarray or ndarray-like
 
+        See Also
+        --------
+        Series.array : Reference to the underlying data.
+        Series.to_numpy : A NumPy array representing the underlying data.
+
         Examples
         --------
         >>> pd.Series([1, 2, 3]).values
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index 295a6038509844..074745429af0de 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -319,6 +319,12 @@ def test_values(self, float_frame, float_string_frame):
         expected = float_frame.reindex(columns=['A', 'B']).values
         assert_almost_equal(arr, expected)
 
+    def test_to_numpy(self):
+        df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]})
+        expected = np.array([[1, 3], [2, 4.5]])
+        result = df.to_numpy()
+        tm.assert_numpy_array_equal(result, expected)
+
     def test_transpose(self, float_frame):
         frame = float_frame
         dft = frame.T
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
index 854e294bd99064..a64340c02cd222 100644
--- a/pandas/tests/indexes/test_numeric.py
+++ b/pandas/tests/indexes/test_numeric.py
@@ -619,6 +619,12 @@ def test_constructor_coercion_signed_to_unsigned(self, uint_dtype):
         with pytest.raises(OverflowError, match=msg):
             Index([-1], dtype=uint_dtype)
 
+    def test_constructor_unwraps_index(self):
+        idx = pd.Index([1, 2])
+        result = pd.Int64Index(idx)
+        expected = np.array([1, 2], dtype='int64')
+        tm.assert_numpy_array_equal(result._data, expected)
+
     def test_coerce_list(self):
         # coerce things
         arr = Index([1, 2, 3, 4])
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 7bc67dd99adaec..47fafe2a900b45 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1268,3 +1268,54 @@ def test_ndarray_values(array, expected):
     r_values = pd.Index(array)._ndarray_values
     tm.assert_numpy_array_equal(l_values, r_values)
     tm.assert_numpy_array_equal(l_values, expected)
+
+
+@pytest.mark.parametrize("array, attr", [
+    (np.array([1, 2], dtype=np.int64), None),
+    (pd.Categorical(['a', 'b']), '_codes'),
+    (pd.core.arrays.period_array(['2000', '2001'], freq='D'), '_data'),
+    (pd.core.arrays.integer_array([0, np.nan]), '_data'),
+    (pd.core.arrays.IntervalArray.from_breaks([0, 1]), '_left'),
+    (pd.SparseArray([0, 1]), '_sparse_values'),
+    # TODO: DatetimeArray(add)
+])
+@pytest.mark.parametrize('box', [pd.Series, pd.Index])
+def test_array(array, attr, box):
+    if array.dtype.name in ('Int64', 'Sparse[int64, 0]') and box is pd.Index:
+        pytest.skip("No index type for {}".format(array.dtype))
+    result = box(array, copy=False).array
+
+    if attr:
+        array = getattr(array, attr)
+        result = getattr(result, attr)
+
+    assert result is array
+
+
+def test_array_multiindex_raises():
+    idx = pd.MultiIndex.from_product([['A'], ['a', 'b']])
+    with pytest.raises(ValueError, match='MultiIndex'):
+        idx.array
+
+
+@pytest.mark.parametrize('array, expected', [
+    (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)),
+    (pd.Categorical(['a', 'b']), np.array(['a', 'b'], dtype=object)),
+    (pd.core.arrays.period_array(['2000', '2001'], freq='D'),
+     np.array([pd.Period('2000', freq="D"), pd.Period('2001', freq='D')])),
+    (pd.core.arrays.integer_array([0, np.nan]),
+     np.array([0, np.nan], dtype=object)),
+    (pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]),
+     np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object)),
+    (pd.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)),
+    # TODO: DatetimeArray(add)
+])
+@pytest.mark.parametrize('box', [pd.Series, pd.Index])
+def test_to_numpy(array, expected, box):
+    thing = box(array)
+
+    if array.dtype.name in ('Int64', 'Sparse[int64, 0]') and box is pd.Index:
+        pytest.skip("No index type for {}".format(array.dtype))
+
+    result = thing.to_numpy()
+    tm.assert_numpy_array_equal(result, expected)

From 6c3d9cbc906fb3d21cbeaf10d58996b6b600f2af Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Thu, 29 Nov 2018 10:32:39 -0600
Subject: [PATCH 37/78] DOC: work around IPython directive bug (#23994)

Closes https://github.com/pandas-dev/pandas/issues/23954
---
 doc/source/io.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 92fc28af0281a9..2b91836d5449d2 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -662,8 +662,8 @@ If ``skip_blank_lines=False``, then ``read_csv`` will not ignore blank lines:
 
    .. ipython:: python
 
-      data = '# empty\n# second empty line\n# third empty' \
-                'line\nX,Y,Z\n1,2,3\nA,B,C\n1,2.,4.\n5.,NaN,10.0'
+      data = ('# empty\n# second empty line\n# third empty'
+                'line\nX,Y,Z\n1,2,3\nA,B,C\n1,2.,4.\n5.,NaN,10.0')
       print(data)
       pd.read_csv(StringIO(data), comment='#', skiprows=4, header=1)
 

From 03658288ce478b2170819fccd8e5bdb48eceb2ce Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Thu, 29 Nov 2018 18:21:24 +0100
Subject: [PATCH 38/78] ENH: Add return_inverse to cython-unique; unify
 unique/factorize-code (#23400)

---
 pandas/_libs/hashtable_class_helper.pxi.in | 478 ++++++++++++---------
 pandas/core/algorithms.py                  |   2 +-
 pandas/tests/test_algos.py                 |  10 +-
 3 files changed, 293 insertions(+), 197 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index a71023ed34f44d..7f4c2a6410870d 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -356,11 +356,12 @@ cdef class {{name}}HashTable(HashTable):
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    def _factorize(self, const {{dtype}}_t[:] values, {{name}}Vector uniques,
-                   Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
-                   object na_value=None):
+    def _unique(self, const {{dtype}}_t[:] values, {{name}}Vector uniques,
+                Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
+                object na_value=None, bint ignore_na=False,
+                bint return_inverse=False):
         """
-        Calculate unique values and labels (no sorting); ignores all NA-values
+        Calculate unique values and labels (no sorting!)
 
         Parameters
         ----------
@@ -374,13 +375,22 @@ cdef class {{name}}HashTable(HashTable):
             Sentinel value used for all NA-values in inverse
         na_value : object, default None
             Value to identify as missing. If na_value is None, then
-            any value satisfying val!=val are considered missing.
+            any value "val" satisfying val != val is considered missing.
+            If na_value is not None, then _additionally_, any value "val"
+            satisfying val == na_value is considered missing.
+        ignore_na : boolean, default False
+            Whether NA-values should be ignored for calculating the uniques. If
+            True, the labels corresponding to missing values will be set to
+            na_sentinel.
+        return_inverse : boolean, default False
+            Whether the mapping of the original array values to their location
+            in the vector of uniques should be returned.
 
         Returns
         -------
         uniques : ndarray[{{dtype}}]
             Unique values of input, not sorted
-        labels : ndarray[int64]
+        labels : ndarray[int64] (if return_inverse=True)
             The labels from values to uniques
         """
         cdef:
@@ -392,7 +402,8 @@ cdef class {{name}}HashTable(HashTable):
             {{name}}VectorData *ud
             bint use_na_value
 
-        labels = np.empty(n, dtype=np.int64)
+        if return_inverse:
+            labels = np.empty(n, dtype=np.int64)
         ud = uniques.data
         use_na_value = na_value is not None
 
@@ -410,20 +421,19 @@ cdef class {{name}}HashTable(HashTable):
             for i in range(n):
                 val = values[i]
 
-                if val != val or (use_na_value and val == na_value2):
+                if ignore_na and (val != val
+                                  or (use_na_value and val == na_value2)):
+                    # if missing values do not count as unique values (i.e. if
+                    # ignore_na is True), skip the hashtable entry for them,
+                    # and replace the corresponding label with na_sentinel
                     labels[i] = na_sentinel
                     continue
 
                 k = kh_get_{{dtype}}(self.table, val)
 
-                if k != self.table.n_buckets:
-                    # k falls into a previous bucket
-                    idx = self.table.vals[k]
-                    labels[i] = idx
-                else:
+                if k == self.table.n_buckets:
                     # k hasn't been seen yet
                     k = kh_put_{{dtype}}(self.table, val, &ret)
-                    self.table.vals[k] = count
 
                     if needs_resize(ud):
                         with gil:
@@ -433,23 +443,82 @@ cdef class {{name}}HashTable(HashTable):
                                                  "Vector.resize() needed")
                             uniques.resize()
                     append_data_{{dtype}}(ud, val)
-                    labels[i] = count
-                    count += 1
+                    if return_inverse:
+                        self.table.vals[k] = count
+                        labels[i] = count
+                        count += 1
+                elif return_inverse:
+                    # k falls into a previous bucket
+                    # only relevant in case we need to construct the inverse
+                    idx = self.table.vals[k]
+                    labels[i] = idx
+
+        if return_inverse:
+            return uniques.to_array(), np.asarray(labels)
+        return uniques.to_array()
 
-        return np.asarray(labels)
+    def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False):
+        """
+        Calculate unique values and labels (no sorting!)
+
+        Parameters
+        ----------
+        values : ndarray[{{dtype}}]
+            Array of values of which unique will be calculated
+        return_inverse : boolean, default False
+            Whether the mapping of the original array values to their location
+            in the vector of uniques should be returned.
+
+        Returns
+        -------
+        uniques : ndarray[{{dtype}}]
+            Unique values of input, not sorted
+        labels : ndarray[int64] (if return_inverse)
+            The labels from values to uniques
+        """
+        uniques = {{name}}Vector()
+        return self._unique(values, uniques, ignore_na=False,
+                            return_inverse=return_inverse)
 
     def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel=-1,
                   object na_value=None):
-        uniques = {{name}}Vector()
-        labels = self._factorize(values, uniques=uniques,
-                                 na_sentinel=na_sentinel, na_value=na_value)
-        return labels, uniques.to_array()
+        """
+        Calculate unique values and labels (no sorting!)
+
+        Missing values are not included in the "uniques" for this method.
+        The labels for any missing values will be set to "na_sentinel"
+
+        Parameters
+        ----------
+        values : ndarray[{{dtype}}]
+            Array of values of which unique will be calculated
+        na_sentinel : Py_ssize_t, default -1
+            Sentinel value used for all NA-values in inverse
+        na_value : object, default None
+            Value to identify as missing. If na_value is None, then
+            any value "val" satisfying val != val is considered missing.
+            If na_value is not None, then _additionally_, any value "val"
+            satisfying val == na_value is considered missing.
+
+        Returns
+        -------
+        uniques : ndarray[{{dtype}}]
+            Unique values of input, not sorted
+        labels : ndarray[int64]
+            The labels from values to uniques
+        """
+        uniques_vector = {{name}}Vector()
+        return self._unique(values, uniques_vector, na_sentinel=na_sentinel,
+                            na_value=na_value, ignore_na=True,
+                            return_inverse=True)
 
     def get_labels(self, const {{dtype}}_t[:] values, {{name}}Vector uniques,
                    Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
                    object na_value=None):
-        return self._factorize(values, uniques, count_prior=count_prior,
-                               na_sentinel=na_sentinel, na_value=na_value)
+        _, labels = self._unique(values, uniques, count_prior=count_prior,
+                                 na_sentinel=na_sentinel, na_value=na_value,
+                                 ignore_na=True, return_inverse=True)
+        return labels
 
     @cython.boundscheck(False)
     def get_labels_groupby(self, const {{dtype}}_t[:] values):
@@ -496,44 +565,6 @@ cdef class {{name}}HashTable(HashTable):
 
         return np.asarray(labels), arr_uniques
 
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    def unique(self, const {{dtype}}_t[:] values):
-        """
-        Calculate unique values without sorting
-
-        Parameters
-        ----------
-        values : ndarray[{{dtype}}]
-            Array of values of which unique will be calculated
-
-        Returns
-        -------
-        uniques : ndarray[{{dtype}}]
-            Unique values of input, not sorted
-        """
-        cdef:
-            Py_ssize_t i, n = len(values)
-            int ret = 0
-            {{dtype}}_t val
-            khiter_t k
-            {{name}}Vector uniques = {{name}}Vector()
-            {{name}}VectorData *ud
-
-        ud = uniques.data
-
-        with nogil:
-            for i in range(n):
-                val = values[i]
-                k = kh_get_{{dtype}}(self.table, val)
-                if k == self.table.n_buckets:
-                    kh_put_{{dtype}}(self.table, val, &ret)
-                    if needs_resize(ud):
-                        with gil:
-                            uniques.resize()
-                    append_data_{{dtype}}(ud, val)
-        return uniques.to_array()
-
 {{endfor}}
 
 
@@ -613,56 +644,6 @@ cdef class StringHashTable(HashTable):
         free(vecs)
         return labels
 
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    def unique(self, ndarray[object] values):
-        """
-        Calculate unique values without sorting
-
-        Parameters
-        ----------
-        values : ndarray[object]
-            Array of values of which unique will be calculated
-
-        Returns
-        -------
-        uniques : ndarray[object]
-            Unique values of input, not sorted
-        """
-        cdef:
-            Py_ssize_t i, count, n = len(values)
-            int64_t[:] uindexer
-            int ret = 0
-            object val
-            ObjectVector uniques
-            khiter_t k
-            const char *v
-            const char **vecs
-
-        vecs = <const char **>malloc(n * sizeof(char *))
-        uindexer = np.empty(n, dtype=np.int64)
-        for i in range(n):
-            val = values[i]
-            v = util.get_c_string(val)
-            vecs[i] = v
-
-        count = 0
-        with nogil:
-            for i in range(n):
-                v = vecs[i]
-                k = kh_get_str(self.table, v)
-                if k == self.table.n_buckets:
-                    kh_put_str(self.table, v, &ret)
-                    uindexer[count] = i
-                    count += 1
-        free(vecs)
-
-        # uniques
-        uniques = ObjectVector()
-        for i in range(count):
-            uniques.append(values[uindexer[i]])
-        return uniques.to_array()
-
     @cython.boundscheck(False)
     def lookup(self, ndarray[object] values):
         cdef:
@@ -726,11 +707,12 @@ cdef class StringHashTable(HashTable):
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    def _factorize(self, ndarray[object] values, ObjectVector uniques,
-                   Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
-                   object na_value=None):
+    def _unique(self, ndarray[object] values, ObjectVector uniques,
+                Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
+                object na_value=None, bint ignore_na=False,
+                bint return_inverse=False):
         """
-        Calculate unique values and labels (no sorting); ignores all NA-values
+        Calculate unique values and labels (no sorting!)
 
         Parameters
         ----------
@@ -743,13 +725,23 @@ cdef class StringHashTable(HashTable):
         na_sentinel : Py_ssize_t, default -1
             Sentinel value used for all NA-values in inverse
         na_value : object, default None
-            Value to identify as missing
+            Value to identify as missing. If na_value is None, then any value
+            that is not a string is considered missing. If na_value is
+            not None, then _additionally_ any value "val" satisfying
+            val == na_value is considered missing.
+        ignore_na : boolean, default False
+            Whether NA-values should be ignored for calculating the uniques. If
+            True, the labels corresponding to missing values will be set to
+            na_sentinel.
+        return_inverse : boolean, default False
+            Whether the mapping of the original array values to their location
+            in the vector of uniques should be returned.
 
         Returns
         -------
         uniques : ndarray[object]
             Unique values of input, not sorted
-        labels : ndarray[int64]
+        labels : ndarray[int64] (if return_inverse=True)
             The labels from values to uniques
         """
         cdef:
@@ -763,41 +755,50 @@ cdef class StringHashTable(HashTable):
             khiter_t k
             bint use_na_value
 
-        labels = np.zeros(n, dtype=np.int64)
+        if return_inverse:
+            labels = np.zeros(n, dtype=np.int64)
         uindexer = np.empty(n, dtype=np.int64)
         use_na_value = na_value is not None
 
-        # assign pointers and pre-filter out missing
+        # assign pointers and pre-filter out missing (if ignore_na)
         vecs = <const char **>malloc(n * sizeof(char *))
         for i in range(n):
             val = values[i]
 
-            if (isinstance(val, (str, unicode))
-                    and not (use_na_value and val == na_value)):
+            if (ignore_na
+                and (not isinstance(val, (str, unicode))
+                     or (use_na_value and val == na_value))):
+                # if missing values do not count as unique values (i.e. if
+                # ignore_na is True), we can skip the actual value, and
+                # replace the label with na_sentinel directly
+                labels[i] = na_sentinel
+            else:
+                # if ignore_na is False, we also stringify NaN/None/etc.
                 v = util.get_c_string(val)
                 vecs[i] = v
-            else:
-                labels[i] = na_sentinel
 
         # compute
         with nogil:
             for i in range(n):
-                if labels[i] == na_sentinel:
+                if ignore_na and labels[i] == na_sentinel:
+                    # skip entries for ignored missing values (see above)
                     continue
 
                 v = vecs[i]
                 k = kh_get_str(self.table, v)
-                if k != self.table.n_buckets:
-                    # k falls into a previous bucket
-                    idx = self.table.vals[k]
-                    labels[i] = <int64_t>idx
-                else:
+                if k == self.table.n_buckets:
                     # k hasn't been seen yet
                     k = kh_put_str(self.table, v, &ret)
-                    self.table.vals[k] = count
                     uindexer[count] = i
-                    labels[i] = <int64_t>count
+                    if return_inverse:
+                        self.table.vals[k] = count
+                        labels[i] = <int64_t>count
                     count += 1
+                elif return_inverse:
+                    # k falls into a previous bucket
+                    # only relevant in case we need to construct the inverse
+                    idx = self.table.vals[k]
+                    labels[i] = <int64_t>idx
 
         free(vecs)
 
@@ -805,20 +806,72 @@ cdef class StringHashTable(HashTable):
         for i in range(count):
             uniques.append(values[uindexer[i]])
 
-        return np.asarray(labels)
+        if return_inverse:
+            return uniques.to_array(), np.asarray(labels)
+        return uniques.to_array()
+
+    def unique(self, ndarray[object] values, bint return_inverse=False):
+        """
+        Calculate unique values and labels (no sorting!)
+
+        Parameters
+        ----------
+        values : ndarray[object]
+            Array of values of which unique will be calculated
+        return_inverse : boolean, default False
+            Whether the mapping of the original array values to their location
+            in the vector of uniques should be returned.
+
+        Returns
+        -------
+        uniques : ndarray[object]
+            Unique values of input, not sorted
+        labels : ndarray[int64] (if return_inverse)
+            The labels from values to uniques
+        """
+        uniques = ObjectVector()
+        return self._unique(values, uniques, ignore_na=False,
+                            return_inverse=return_inverse)
 
     def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
                   object na_value=None):
-        uniques = ObjectVector()
-        labels = self._factorize(values, uniques=uniques,
-                                 na_sentinel=na_sentinel, na_value=na_value)
-        return labels, uniques.to_array()
+        """
+        Calculate unique values and labels (no sorting!)
+
+        Missing values are not included in the "uniques" for this method.
+        The labels for any missing values will be set to "na_sentinel"
+
+        Parameters
+        ----------
+        values : ndarray[object]
+            Array of values of which unique will be calculated
+        na_sentinel : Py_ssize_t, default -1
+            Sentinel value used for all NA-values in inverse
+        na_value : object, default None
+            Value to identify as missing. If na_value is None, then any value
+            that is not a string is considered missing. If na_value is
+            not None, then _additionally_ any value "val" satisfying
+            val == na_value is considered missing.
+
+        Returns
+        -------
+        uniques : ndarray[object]
+            Unique values of input, not sorted
+        labels : ndarray[int64]
+            The labels from values to uniques
+        """
+        uniques_vector = ObjectVector()
+        return self._unique(values, uniques_vector, na_sentinel=na_sentinel,
+                            na_value=na_value, ignore_na=True,
+                            return_inverse=True)
 
     def get_labels(self, ndarray[object] values, ObjectVector uniques,
                    Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
                    object na_value=None):
-        return self._factorize(values, uniques, count_prior=count_prior,
-                               na_sentinel=na_sentinel, na_value=na_value)
+        _, labels = self._unique(values, uniques, count_prior=count_prior,
+                                 na_sentinel=na_sentinel, na_value=na_value,
+                                 ignore_na=True, return_inverse=True)
+        return labels
 
 
 cdef class PyObjectHashTable(HashTable):
@@ -908,44 +961,12 @@ cdef class PyObjectHashTable(HashTable):
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    def unique(self, ndarray[object] values):
+    def _unique(self, ndarray[object] values, ObjectVector uniques,
+                Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
+                object na_value=None, bint ignore_na=False,
+                bint return_inverse=False):
         """
-        Calculate unique values without sorting
-
-        Parameters
-        ----------
-        values : ndarray[object]
-            Array of values of which unique will be calculated
-
-        Returns
-        -------
-        uniques : ndarray[object]
-            Unique values of input, not sorted
-        """
-        cdef:
-            Py_ssize_t i, n = len(values)
-            int ret = 0
-            object val
-            khiter_t k
-            ObjectVector uniques = ObjectVector()
-
-        for i in range(n):
-            val = values[i]
-            hash(val)
-            k = kh_get_pymap(self.table, <PyObject*>val)
-            if k == self.table.n_buckets:
-                kh_put_pymap(self.table, <PyObject*>val, &ret)
-                uniques.append(val)
-
-        return uniques.to_array()
-
-    @cython.boundscheck(False)
-    @cython.wraparound(False)
-    def _factorize(self, ndarray[object] values, ObjectVector uniques,
-                   Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
-                   object na_value=None):
-        """
-        Calculate unique values and labels (no sorting); ignores all NA-values
+        Calculate unique values and labels (no sorting!)
 
         Parameters
         ----------
@@ -959,13 +980,22 @@ cdef class PyObjectHashTable(HashTable):
             Sentinel value used for all NA-values in inverse
         na_value : object, default None
             Value to identify as missing. If na_value is None, then None _plus_
-            any value satisfying val!=val are considered missing.
+            any value "val" satisfying val != val is considered missing.
+            If na_value is not None, then _additionally_, any value "val"
+            satisfying val == na_value is considered missing.
+        ignore_na : boolean, default False
+            Whether NA-values should be ignored for calculating the uniques. If
+            True, the labels corresponding to missing values will be set to
+            na_sentinel.
+        return_inverse : boolean, default False
+            Whether the mapping of the original array values to their location
+            in the vector of uniques should be returned.
 
         Returns
         -------
         uniques : ndarray[object]
             Unique values of input, not sorted
-        labels : ndarray[int64]
+        labels : ndarray[int64] (if return_inverse=True)
             The labels from values to uniques
         """
         cdef:
@@ -976,42 +1006,100 @@ cdef class PyObjectHashTable(HashTable):
             khiter_t k
             bint use_na_value
 
-        labels = np.empty(n, dtype=np.int64)
+        if return_inverse:
+            labels = np.empty(n, dtype=np.int64)
         use_na_value = na_value is not None
 
         for i in range(n):
             val = values[i]
             hash(val)
 
-            if ((val != val or val is None)
-                    or (use_na_value and val == na_value)):
+            if ignore_na and ((val != val or val is None)
+                              or (use_na_value and val == na_value)):
+                # if missing values do not count as unique values (i.e. if
+                # ignore_na is True), skip the hashtable entry for them, and
+                # replace the corresponding label with na_sentinel
                 labels[i] = na_sentinel
                 continue
 
             k = kh_get_pymap(self.table, <PyObject*>val)
-            if k != self.table.n_buckets:
-                # k falls into a previous bucket
-                idx = self.table.vals[k]
-                labels[i] = idx
-            else:
+            if k == self.table.n_buckets:
                 # k hasn't been seen yet
                 k = kh_put_pymap(self.table, <PyObject*>val, &ret)
-                self.table.vals[k] = count
                 uniques.append(val)
-                labels[i] = count
-                count += 1
+                if return_inverse:
+                    self.table.vals[k] = count
+                    labels[i] = count
+                    count += 1
+            elif return_inverse:
+                # k falls into a previous bucket
+                # only relevant in case we need to construct the inverse
+                idx = self.table.vals[k]
+                labels[i] = idx
+
+        if return_inverse:
+            return uniques.to_array(), np.asarray(labels)
+        return uniques.to_array()
+
+    def unique(self, ndarray[object] values, bint return_inverse=False):
+        """
+        Calculate unique values and labels (no sorting!)
 
-        return np.asarray(labels)
+        Parameters
+        ----------
+        values : ndarray[object]
+            Array of values of which unique will be calculated
+        return_inverse : boolean, default False
+            Whether the mapping of the original array values to their location
+            in the vector of uniques should be returned.
+
+        Returns
+        -------
+        uniques : ndarray[object]
+            Unique values of input, not sorted
+        labels : ndarray[int64] (if return_inverse)
+            The labels from values to uniques
+        """
+        uniques = ObjectVector()
+        return self._unique(values, uniques, ignore_na=False,
+                            return_inverse=return_inverse)
 
     def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
                   object na_value=None):
-        uniques = ObjectVector()
-        labels = self._factorize(values, uniques=uniques,
-                                 na_sentinel=na_sentinel, na_value=na_value)
-        return labels, uniques.to_array()
+        """
+        Calculate unique values and labels (no sorting!)
+
+        Missing values are not included in the "uniques" for this method.
+        The labels for any missing values will be set to "na_sentinel"
+
+        Parameters
+        ----------
+        values : ndarray[object]
+            Array of values of which unique will be calculated
+        na_sentinel : Py_ssize_t, default -1
+            Sentinel value used for all NA-values in inverse
+        na_value : object, default None
+            Value to identify as missing. If na_value is None, then None _plus_
+            any value "val" satisfying val != val is considered missing.
+            If na_value is not None, then _additionally_, any value "val"
+            satisfying val == na_value is considered missing.
+
+        Returns
+        -------
+        uniques : ndarray[object]
+            Unique values of input, not sorted
+        labels : ndarray[int64]
+            The labels from values to uniques
+        """
+        uniques_vector = ObjectVector()
+        return self._unique(values, uniques_vector, na_sentinel=na_sentinel,
+                            na_value=na_value, ignore_na=True,
+                            return_inverse=True)
 
     def get_labels(self, ndarray[object] values, ObjectVector uniques,
                    Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
                    object na_value=None):
-        return self._factorize(values, uniques, count_prior=count_prior,
-                               na_sentinel=na_sentinel, na_value=na_value)
+        _, labels = self._unique(values, uniques, count_prior=count_prior,
+                                 na_sentinel=na_sentinel, na_value=na_value,
+                                 ignore_na=True, return_inverse=True)
+        return labels
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 7aceef8634e206..1a4368ee8ea98a 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -460,7 +460,7 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None,
     (hash_klass, _), values = _get_data_algo(values, _hashtables)
 
     table = hash_klass(size_hint or len(values))
-    labels, uniques = table.factorize(values, na_sentinel=na_sentinel,
+    uniques, labels = table.factorize(values, na_sentinel=na_sentinel,
                                       na_value=na_value)
 
     labels = ensure_platform_int(labels)
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index fa33a1ceae0b94..c9d403f6696af1 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1361,6 +1361,14 @@ def test_hashtable_unique(self, htable, tm_dtype, writable):
         result_unique = htable().unique(s_duplicated.values)
         tm.assert_numpy_array_equal(result_unique, expected_unique)
 
+        # test return_inverse=True
+        # reconstruction can only succeed if the inverse is correct
+        result_unique, result_inverse = htable().unique(s_duplicated.values,
+                                                        return_inverse=True)
+        tm.assert_numpy_array_equal(result_unique, expected_unique)
+        reconstr = result_unique[result_inverse]
+        tm.assert_numpy_array_equal(reconstr, s_duplicated.values)
+
     @pytest.mark.parametrize('htable, tm_dtype', [
         (ht.PyObjectHashTable, 'String'),
         (ht.StringHashTable, 'String'),
@@ -1383,7 +1391,7 @@ def test_hashtable_factorize(self, htable, tm_dtype, writable):
         s_duplicated.values.setflags(write=writable)
         na_mask = s_duplicated.isna().values
 
-        result_inverse, result_unique = htable().factorize(s_duplicated.values)
+        result_unique, result_inverse = htable().factorize(s_duplicated.values)
 
         # drop_duplicates has own cython code (hash_table_func_helper.pxi)
         # and is tested separately; keeps first occurrence like ht.factorize()

From 7653a6b43a8a302bc1d88903c8c99730a00bcc80 Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Thu, 29 Nov 2018 10:22:44 -0700
Subject: [PATCH 39/78] ENH: Implement IntervalIndex.is_overlapping (#23327)

---
 doc/source/api.rst                            |  1 +
 doc/source/whatsnew/v0.24.0.rst               |  1 +
 pandas/_libs/intervaltree.pxi.in              | 22 ++++++-
 pandas/core/arrays/interval.py                |  2 +
 pandas/core/indexes/interval.py               | 60 ++++++++++++++++++
 .../tests/indexes/interval/test_interval.py   | 61 +++++++++++++++++++
 .../indexes/interval/test_interval_tree.py    | 35 +++++++++++
 7 files changed, 181 insertions(+), 1 deletion(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 415a8dfa029c40..82ae58acc4974a 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1671,6 +1671,7 @@ IntervalIndex Components
    IntervalIndex.length
    IntervalIndex.values
    IntervalIndex.is_non_overlapping_monotonic
+   IntervalIndex.is_overlapping
    IntervalIndex.get_loc
    IntervalIndex.get_indexer
    IntervalIndex.set_closed
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index d305cd7bf47e59..f888648a9363e4 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -365,6 +365,7 @@ Other Enhancements
 - :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
 - :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
 - :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue: `8839`)
+- :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)
 
 .. _whatsnew_0240.api_breaking:
 
diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in
index 7be3bdbc1048a8..fb6f30c030f113 100644
--- a/pandas/_libs/intervaltree.pxi.in
+++ b/pandas/_libs/intervaltree.pxi.in
@@ -26,7 +26,7 @@ cdef class IntervalTree(IntervalMixin):
     cdef:
         readonly object left, right, root, dtype
         readonly str closed
-        object _left_sorter, _right_sorter
+        object _is_overlapping, _left_sorter, _right_sorter
 
     def __init__(self, left, right, closed='right', leaf_size=100):
         """
@@ -81,6 +81,26 @@ cdef class IntervalTree(IntervalMixin):
             self._right_sorter = np.argsort(self.right)
         return self._right_sorter
 
+    @property
+    def is_overlapping(self):
+        """
+        Determine if the IntervalTree contains overlapping intervals.
+        Cached as self._is_overlapping.
+        """
+        if self._is_overlapping is not None:
+            return self._is_overlapping
+
+        # <= when both sides closed since endpoints can overlap
+        op = le if self.closed == 'both' else lt
+
+        # overlap if start of current interval < end of previous interval
+        # (current and previous in terms of sorted order by left/start side)
+        current = self.left[self.left_sorter[1:]]
+        previous = self.right[self.left_sorter[:-1]]
+        self._is_overlapping = bool(op(current, previous).any())
+
+        return self._is_overlapping
+
     def get_loc(self, scalar_t key):
         """Return all positions corresponding to intervals that overlap with
         the given scalar key
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index 70be850481d857..b055bc3f2eb526 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -74,6 +74,7 @@
 length
 values
 is_non_overlapping_monotonic
+%(extra_attributes)s\
 
 Methods
 -------
@@ -107,6 +108,7 @@
     summary="Pandas array for interval data that are closed on the same side.",
     versionadded="0.24.0",
     name='',
+    extra_attributes='',
     extra_methods='',
     examples=textwrap.dedent("""\
     Examples
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 5ee6a816d91f50..444f9e21b0bdcb 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -104,6 +104,7 @@ def _new_IntervalIndex(cls, d):
     summary="Immutable index of intervals that are closed on the same side.",
     name=_index_doc_kwargs['name'],
     versionadded="0.20.0",
+    extra_attributes="is_overlapping\n",
     extra_methods="contains\n",
     examples=textwrap.dedent("""\
     Examples
@@ -469,6 +470,61 @@ def is_unique(self):
     def is_non_overlapping_monotonic(self):
         return self._data.is_non_overlapping_monotonic
 
+    @property
+    def is_overlapping(self):
+        """
+        Return True if the IntervalIndex has overlapping intervals, else False.
+
+        Two intervals overlap if they share a common point, including closed
+        endpoints. Intervals that only have an open endpoint in common do not
+        overlap.
+
+        .. versionadded:: 0.24.0
+
+        Returns
+        -------
+        bool
+            Boolean indicating if the IntervalIndex has overlapping intervals.
+
+        Examples
+        --------
+        >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])
+        >>> index
+        IntervalIndex([(0, 2], (1, 3], (4, 5]],
+              closed='right',
+              dtype='interval[int64]')
+        >>> index.is_overlapping
+        True
+
+        Intervals that share closed endpoints overlap:
+
+        >>> index = pd.interval_range(0, 3, closed='both')
+        >>> index
+        IntervalIndex([[0, 1], [1, 2], [2, 3]],
+              closed='both',
+              dtype='interval[int64]')
+        >>> index.is_overlapping
+        True
+
+        Intervals that only have an open endpoint in common do not overlap:
+
+        >>> index = pd.interval_range(0, 3, closed='left')
+        >>> index
+        IntervalIndex([[0, 1), [1, 2), [2, 3)],
+              closed='left',
+              dtype='interval[int64]')
+        >>> index.is_overlapping
+        False
+
+        See Also
+        --------
+        Interval.overlaps : Check whether two Interval objects overlap.
+        IntervalIndex.overlaps : Check an IntervalIndex elementwise for
+            overlaps.
+        """
+        # GH 23309
+        return self._engine.is_overlapping
+
     @Appender(_index_shared_docs['_convert_scalar_indexer'])
     def _convert_scalar_indexer(self, key, kind=None):
         if kind == 'iloc':
@@ -575,6 +631,10 @@ def _maybe_convert_i8(self, key):
         else:
             # DatetimeIndex/TimedeltaIndex
             key_dtype, key_i8 = key.dtype, Index(key.asi8)
+            if key.hasnans:
+                # convert NaT from it's i8 value to np.nan so it's not viewed
+                # as a valid value, maybe causing errors (e.g. is_overlapping)
+                key_i8 = key_i8.where(~key._isnan)
 
         # ensure consistency with IntervalIndex subtype
         subtype = self.dtype.subtype
diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
index da3b3253ecbd13..c4dac6948cd7a4 100644
--- a/pandas/tests/indexes/interval/test_interval.py
+++ b/pandas/tests/indexes/interval/test_interval.py
@@ -654,6 +654,23 @@ def test_maybe_convert_i8(self, breaks):
         expected = Index(breaks.asi8)
         tm.assert_index_equal(result, expected)
 
+    @pytest.mark.parametrize('breaks', [
+        date_range('2018-01-01', periods=5),
+        timedelta_range('0 days', periods=5)])
+    def test_maybe_convert_i8_nat(self, breaks):
+        # GH 20636
+        index = IntervalIndex.from_breaks(breaks)
+
+        to_convert = breaks._constructor([pd.NaT] * 3)
+        expected = pd.Float64Index([np.nan] * 3)
+        result = index._maybe_convert_i8(to_convert)
+        tm.assert_index_equal(result, expected)
+
+        to_convert = to_convert.insert(0, breaks[0])
+        expected = expected.insert(0, float(breaks[0].value))
+        result = index._maybe_convert_i8(to_convert)
+        tm.assert_index_equal(result, expected)
+
     @pytest.mark.parametrize('breaks', [
         np.arange(5, dtype='int64'),
         np.arange(5, dtype='float64')], ids=lambda x: str(x.dtype))
@@ -1082,6 +1099,50 @@ def test_is_non_overlapping_monotonic(self, closed):
             idx = IntervalIndex.from_breaks(range(4), closed=closed)
             assert idx.is_non_overlapping_monotonic is True
 
+    @pytest.mark.parametrize('start, shift, na_value', [
+        (0, 1, np.nan),
+        (Timestamp('2018-01-01'), Timedelta('1 day'), pd.NaT),
+        (Timedelta('0 days'), Timedelta('1 day'), pd.NaT)])
+    def test_is_overlapping(self, start, shift, na_value, closed):
+        # GH 23309
+        # see test_interval_tree.py for extensive tests; interface tests here
+
+        # non-overlapping
+        tuples = [(start + n * shift, start + (n + 1) * shift)
+                  for n in (0, 2, 4)]
+        index = IntervalIndex.from_tuples(tuples, closed=closed)
+        assert index.is_overlapping is False
+
+        # non-overlapping with NA
+        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
+        index = IntervalIndex.from_tuples(tuples, closed=closed)
+        assert index.is_overlapping is False
+
+        # overlapping
+        tuples = [(start + n * shift, start + (n + 2) * shift)
+                  for n in range(3)]
+        index = IntervalIndex.from_tuples(tuples, closed=closed)
+        assert index.is_overlapping is True
+
+        # overlapping with NA
+        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
+        index = IntervalIndex.from_tuples(tuples, closed=closed)
+        assert index.is_overlapping is True
+
+        # common endpoints
+        tuples = [(start + n * shift, start + (n + 1) * shift)
+                  for n in range(3)]
+        index = IntervalIndex.from_tuples(tuples, closed=closed)
+        result = index.is_overlapping
+        expected = closed == 'both'
+        assert result is expected
+
+        # common endpoints with NA
+        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
+        index = IntervalIndex.from_tuples(tuples, closed=closed)
+        result = index.is_overlapping
+        assert result is expected
+
     @pytest.mark.parametrize('tuples', [
         lzip(range(10), range(1, 11)),
         lzip(date_range('20170101', periods=10),
diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py
index 686cdaccd3883c..90255835d9147e 100644
--- a/pandas/tests/indexes/interval/test_interval_tree.py
+++ b/pandas/tests/indexes/interval/test_interval_tree.py
@@ -1,5 +1,7 @@
 from __future__ import division
 
+from itertools import permutations
+
 import numpy as np
 import pytest
 
@@ -135,3 +137,36 @@ def test_get_indexer_closed(self, closed, leaf_size):
 
         expected = found if tree.closed_right else not_found
         tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
+
+    @pytest.mark.parametrize('left, right, expected', [
+        (np.array([0, 1, 4]), np.array([2, 3, 5]), True),
+        (np.array([0, 1, 2]), np.array([5, 4, 3]), True),
+        (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
+        (np.array([0, 2, 4]), np.array([1, 3, 5]), False),
+        (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False)])
+    @pytest.mark.parametrize('order', map(list, permutations(range(3))))
+    def test_is_overlapping(self, closed, order, left, right, expected):
+        # GH 23309
+        tree = IntervalTree(left[order], right[order], closed=closed)
+        result = tree.is_overlapping
+        assert result is expected
+
+    @pytest.mark.parametrize('order', map(list, permutations(range(3))))
+    def test_is_overlapping_endpoints(self, closed, order):
+        """shared endpoints are marked as overlapping"""
+        # GH 23309
+        left, right = np.arange(3), np.arange(1, 4)
+        tree = IntervalTree(left[order], right[order], closed=closed)
+        result = tree.is_overlapping
+        expected = closed is 'both'
+        assert result is expected
+
+    @pytest.mark.parametrize('left, right', [
+        (np.array([], dtype='int64'), np.array([], dtype='int64')),
+        (np.array([0], dtype='int64'), np.array([1], dtype='int64')),
+        (np.array([np.nan]), np.array([np.nan])),
+        (np.array([np.nan] * 3), np.array([np.nan] * 3))])
+    def test_is_overlapping_trivial(self, closed, left, right):
+        # GH 23309
+        tree = IntervalTree(left, right, closed=closed)
+        assert tree.is_overlapping is False

From 87c643364acde61dac9cc1c0828f7f3056facb43 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 29 Nov 2018 14:01:13 -0800
Subject: [PATCH 40/78] REF: prelim for fixing array_to_datetime (#24000)

---
 pandas/_libs/tslib.pyx         |  5 +---
 pandas/core/tools/datetimes.py | 54 ++++++++++++++++++----------------
 2 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index ddeaffbfb3cc04..609608a0948c5d 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -463,8 +463,7 @@ def array_with_unit_to_datetime(ndarray values, object unit,
 @cython.boundscheck(False)
 cpdef array_to_datetime(ndarray[object] values, str errors='raise',
                         bint dayfirst=False, bint yearfirst=False,
-                        object format=None, object utc=None,
-                        bint require_iso8601=False):
+                        object utc=None, bint require_iso8601=False):
     """
     Converts a 1D array of date-like values to a numpy array of either:
         1) datetime64[ns] data
@@ -488,8 +487,6 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
          dayfirst parsing behavior when encountering datetime strings
     yearfirst : bool, default False
          yearfirst parsing behavior when encountering datetime strings
-    format : str, default None
-         format of the string to parse
     utc : bool, default None
          indicator whether the dates should be UTC
     require_iso8601 : bool, default False
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index ee44a64514f4f1..1266b57c098cd9 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -231,9 +231,9 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
             require_iso8601 = not infer_datetime_format
             format = None
 
+    tz_parsed = None
+    result = None
     try:
-        result = None
-
         if format is not None:
             # shortcut formatting here
             if format == '%Y%m%d':
@@ -267,7 +267,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
                             raise
                         result = arg
 
-        if result is None and (format is None or infer_datetime_format):
+        if result is None:
+            assert format is None or infer_datetime_format
             result, tz_parsed = tslib.array_to_datetime(
                 arg,
                 errors=errors,
@@ -276,36 +277,37 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
                 yearfirst=yearfirst,
                 require_iso8601=require_iso8601
             )
-            if tz_parsed is not None:
-                if box:
-                    # We can take a shortcut since the datetime64 numpy array
-                    # is in UTC
-                    return DatetimeIndex._simple_new(result, name=name,
-                                                     tz=tz_parsed)
-                else:
-                    # Convert the datetime64 numpy array to an numpy array
-                    # of datetime objects
-                    result = [Timestamp(ts, tz=tz_parsed).to_pydatetime()
-                              for ts in result]
-                    return np.array(result, dtype=object)
-
-        if box:
-            # Ensure we return an Index in all cases where box=True
-            if is_datetime64_dtype(result):
-                return DatetimeIndex(result, tz=tz, name=name)
-            elif is_object_dtype(result):
-                # e.g. an Index of datetime objects
-                from pandas import Index
-                return Index(result, name=name)
-        return result
-
     except ValueError as e:
+        # Fallback to try to convert datetime objects
         try:
             values, tz = conversion.datetime_to_datetime64(arg)
             return DatetimeIndex._simple_new(values, name=name, tz=tz)
         except (ValueError, TypeError):
             raise e
 
+    if tz_parsed is not None:
+        if box:
+            # We can take a shortcut since the datetime64 numpy array
+            # is in UTC
+            return DatetimeIndex._simple_new(result, name=name,
+                                             tz=tz_parsed)
+        else:
+            # Convert the datetime64 numpy array to an numpy array
+            # of datetime objects
+            result = [Timestamp(ts, tz=tz_parsed).to_pydatetime()
+                      for ts in result]
+            return np.array(result, dtype=object)
+
+    if box:
+        # Ensure we return an Index in all cases where box=True
+        if is_datetime64_dtype(result):
+            return DatetimeIndex(result, tz=tz, name=name)
+        elif is_object_dtype(result):
+            # e.g. an Index of datetime objects
+            from pandas import Index
+            return Index(result, name=name)
+    return result
+
 
 def _adjust_to_origin(arg, origin, unit):
     """

From 5a91c5330d7d79e6f3fd6aa4d1a224d0ea8e2b44 Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Thu, 29 Nov 2018 23:01:48 +0100
Subject: [PATCH 41/78] TST: fix fixture for numpy dtypes (#23983)

---
 pandas/conftest.py                     | 118 +++++++++++++++----------
 pandas/tests/series/test_duplicates.py |   7 +-
 2 files changed, 78 insertions(+), 47 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 29c3abf4d1e1b6..20f97bdec11075 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -386,8 +386,17 @@ def tz_aware_fixture(request):
 COMPLEX_DTYPES = [complex, "complex64", "complex128"]
 STRING_DTYPES = [str, 'str', 'U']
 
+DATETIME_DTYPES = ['datetime64[ns]', 'M8[ns]']
+TIMEDELTA_DTYPES = ['timedelta64[ns]', 'm8[ns]']
+
+BOOL_DTYPES = [bool, 'bool']
+BYTES_DTYPES = [bytes, 'bytes']
+OBJECT_DTYPES = [object, 'object']
+
 ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES
-ALL_NUMPY_DTYPES = ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES
+ALL_NUMPY_DTYPES = (ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES
+                    + DATETIME_DTYPES + TIMEDELTA_DTYPES + BOOL_DTYPES
+                    + OBJECT_DTYPES + BYTES_DTYPES * PY3)  # bytes only for PY3
 
 
 @pytest.fixture(params=STRING_DTYPES)
@@ -406,8 +415,9 @@ def float_dtype(request):
     """
     Parameterized fixture for float dtypes.
 
-    * float32
-    * float64
+    * float
+    * 'float32'
+    * 'float64'
     """
 
     return request.param
@@ -418,8 +428,9 @@ def complex_dtype(request):
     """
     Parameterized fixture for complex dtypes.
 
-    * complex64
-    * complex128
+    * complex
+    * 'complex64'
+    * 'complex128'
     """
 
     return request.param
@@ -430,10 +441,11 @@ def sint_dtype(request):
     """
     Parameterized fixture for signed integer dtypes.
 
-    * int8
-    * int16
-    * int32
-    * int64
+    * int
+    * 'int8'
+    * 'int16'
+    * 'int32'
+    * 'int64'
     """
 
     return request.param
@@ -444,10 +456,10 @@ def uint_dtype(request):
     """
     Parameterized fixture for unsigned integer dtypes.
 
-    * uint8
-    * uint16
-    * uint32
-    * uint64
+    * 'uint8'
+    * 'uint16'
+    * 'uint32'
+    * 'uint64'
     """
 
     return request.param
@@ -456,16 +468,17 @@ def uint_dtype(request):
 @pytest.fixture(params=ALL_INT_DTYPES)
 def any_int_dtype(request):
     """
-    Parameterized fixture for any integer dtypes.
+    Parameterized fixture for any integer dtype.
 
-    * int8
-    * uint8
-    * int16
-    * uint16
-    * int32
-    * uint32
-    * int64
-    * uint64
+    * int
+    * 'int8'
+    * 'uint8'
+    * 'int16'
+    * 'uint16'
+    * 'int32'
+    * 'uint32'
+    * 'int64'
+    * 'uint64'
     """
 
     return request.param
@@ -474,18 +487,20 @@ def any_int_dtype(request):
 @pytest.fixture(params=ALL_REAL_DTYPES)
 def any_real_dtype(request):
     """
-    Parameterized fixture for any (purely) real numeric dtypes.
+    Parameterized fixture for any (purely) real numeric dtype.
 
-    * int8
-    * uint8
-    * int16
-    * uint16
-    * int32
-    * uint32
-    * int64
-    * uint64
-    * float32
-    * float64
+    * int
+    * 'int8'
+    * 'uint8'
+    * 'int16'
+    * 'uint16'
+    * 'int32'
+    * 'uint32'
+    * 'int64'
+    * 'uint64'
+    * float
+    * 'float32'
+    * 'float64'
     """
 
     return request.param
@@ -496,21 +511,34 @@ def any_numpy_dtype(request):
     """
     Parameterized fixture for all numpy dtypes.
 
-    * int8
-    * uint8
-    * int16
-    * uint16
-    * int32
-    * uint32
-    * int64
-    * uint64
-    * float32
-    * float64
-    * complex64
-    * complex128
+    * bool
+    * 'bool'
+    * int
+    * 'int8'
+    * 'uint8'
+    * 'int16'
+    * 'uint16'
+    * 'int32'
+    * 'uint32'
+    * 'int64'
+    * 'uint64'
+    * float
+    * 'float32'
+    * 'float64'
+    * complex
+    * 'complex64'
+    * 'complex128'
     * str
     * 'str'
     * 'U'
+    * bytes
+    * 'bytes'
+    * 'datetime64[ns]'
+    * 'M8[ns]'
+    * 'timedelta64[ns]'
+    * 'm8[ns]'
+    * object
+    * 'object'
     """
 
     return request.param
diff --git a/pandas/tests/series/test_duplicates.py b/pandas/tests/series/test_duplicates.py
index f41483405f6cc3..26222637e35097 100644
--- a/pandas/tests/series/test_duplicates.py
+++ b/pandas/tests/series/test_duplicates.py
@@ -91,8 +91,11 @@ def __ne__(self, other):
         ('last', Series([False, True, True, False, False, False, False])),
         (False, Series([False, True, True, False, True, True, False]))
     ])
-def test_drop_duplicates_non_bool(any_numpy_dtype, keep, expected):
-    tc = Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(any_numpy_dtype))
+def test_drop_duplicates(any_numpy_dtype, keep, expected):
+    tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype))
+
+    if tc.dtype == 'bool':
+        pytest.skip('tested separately in test_drop_duplicates_bool')
 
     tm.assert_series_equal(tc.duplicated(keep=keep), expected)
     tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])

From dc8d35aa53d496b651f5e1ab4cb2604e9f7236c7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 29 Nov 2018 14:04:15 -0800
Subject: [PATCH 42/78] Implement TimedeltaArray._from_sequence (#23953)

---
 pandas/core/arrays/timedeltas.py       | 24 +++++++++++-------
 pandas/core/indexes/timedeltas.py      | 35 ++++++--------------------
 pandas/tests/arrays/test_timedeltas.py |  7 ++++++
 pandas/tests/dtypes/test_inference.py  |  2 +-
 4 files changed, 30 insertions(+), 38 deletions(-)

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 3f14b61c1ea6c0..856a01e41ce13e 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -163,11 +163,17 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE):
         return result
 
     def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False):
+        return cls._from_sequence(values, freq=freq, dtype=dtype, copy=copy)
+
+    @classmethod
+    def _from_sequence(cls, data, freq=None, unit=None,
+                       dtype=_TD_DTYPE, copy=False):
+        if dtype != _TD_DTYPE:
+            raise ValueError("Only timedelta64[ns] dtype is valid.")
 
         freq, freq_infer = dtl.maybe_infer_freq(freq)
 
-        values, inferred_freq = sequence_to_td64ns(
-            values, copy=copy, unit=None)
+        data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
         if inferred_freq is not None:
             if freq is not None and freq != inferred_freq:
                 raise ValueError('Inferred frequency {inferred} from passed '
@@ -179,13 +185,13 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False):
                 freq = inferred_freq
             freq_infer = False
 
-        result = cls._simple_new(values, freq=freq)
-        # check that we are matching freqs
-        if inferred_freq is None and len(result) > 0:
-            if freq is not None and not freq_infer:
-                cls._validate_frequency(result, freq)
+        result = cls._simple_new(data, freq=freq)
+
+        if inferred_freq is None and freq is not None:
+            # this condition precludes `freq_infer`
+            cls._validate_frequency(result, freq)
 
-        if freq_infer:
+        elif freq_infer:
             result.freq = to_offset(result.inferred_freq)
 
         return result
@@ -666,7 +672,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
         warnings.warn("Passing datetime64-dtype data to TimedeltaIndex is "
                       "deprecated, will raise a TypeError in a future "
                       "version",
-                      FutureWarning, stacklevel=3)
+                      FutureWarning, stacklevel=4)
         data = ensure_int64(data).view(_TD_DTYPE)
 
     else:
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 4be896049176c2..ed4e43df8f41a3 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -17,8 +17,7 @@
 
 from pandas.core.arrays import datetimelike as dtl
 from pandas.core.arrays.timedeltas import (
-    TimedeltaArrayMixin as TimedeltaArray, _is_convertible_to_td, _to_m8,
-    sequence_to_td64ns)
+    TimedeltaArrayMixin as TimedeltaArray, _is_convertible_to_td, _to_m8)
 from pandas.core.base import _shared_docs
 import pandas.core.common as com
 from pandas.core.indexes.base import Index, _index_shared_docs
@@ -132,7 +131,7 @@ def _join_i8_wrapper(joinf, **kwargs):
     # Constructors
 
     def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
-                periods=None, closed=None, dtype=None, copy=False,
+                periods=None, closed=None, dtype=_TD_DTYPE, copy=False,
                 name=None, verify_integrity=None):
 
         if verify_integrity is not None:
@@ -142,9 +141,8 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
         else:
             verify_integrity = True
 
-        freq, freq_infer = dtl.maybe_infer_freq(freq)
-
         if data is None:
+            freq, freq_infer = dtl.maybe_infer_freq(freq)
             warnings.warn("Creating a TimedeltaIndex by passing range "
                           "endpoints is deprecated.  Use "
                           "`pandas.timedelta_range` instead.",
@@ -167,29 +165,10 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
 
         # - Cases checked above all return/raise before reaching here - #
 
-        data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
-        if inferred_freq is not None:
-            if freq is not None and freq != inferred_freq:
-                raise ValueError('Inferred frequency {inferred} from passed '
-                                 'values does not conform to passed frequency '
-                                 '{passed}'
-                                 .format(inferred=inferred_freq,
-                                         passed=freq.freqstr))
-            elif freq_infer:
-                freq = inferred_freq
-                freq_infer = False
-            verify_integrity = False
-
-        subarr = cls._simple_new(data, name=name, freq=freq)
-        # check that we are matching freqs
-        if verify_integrity and len(subarr) > 0:
-            if freq is not None and not freq_infer:
-                cls._validate_frequency(subarr, freq)
-
-        if freq_infer:
-            subarr.freq = to_offset(subarr.inferred_freq)
-
-        return subarr
+        result = cls._from_sequence(data, freq=freq, unit=unit,
+                                    dtype=dtype, copy=copy)
+        result.name = name
+        return result
 
     @classmethod
     def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE):
diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py
index 4d2664054b1c18..aef30c1bb77447 100644
--- a/pandas/tests/arrays/test_timedeltas.py
+++ b/pandas/tests/arrays/test_timedeltas.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 import numpy as np
+import pytest
 
 import pandas as pd
 from pandas.core.arrays import TimedeltaArrayMixin as TimedeltaArray
@@ -8,6 +9,12 @@
 
 
 class TestTimedeltaArray(object):
+    def test_from_sequence_dtype(self):
+        msg = r"Only timedelta64\[ns\] dtype is valid"
+        with pytest.raises(ValueError, match=msg):
+            TimedeltaArray._from_sequence([], dtype=object)
+        with pytest.raises(ValueError, match=msg):
+            TimedeltaArray([], dtype=object)
 
     def test_abs(self):
         vals = np.array([-3600 * 10**9, 'NaT', 7200 * 10**9], dtype='m8[ns]')
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 7732c5815b1159..0c22b595bc74d4 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -1163,7 +1163,7 @@ def test_is_timedelta(self):
         assert not is_timedelta64_ns_dtype('timedelta64')
         assert is_timedelta64_ns_dtype('timedelta64[ns]')
 
-        tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64')
+        tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64[ns]')
         assert is_timedelta64_dtype(tdi)
         assert is_timedelta64_ns_dtype(tdi)
         assert is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))

From d1a2f5fa9874cb61426550bee6107cd239bf893a Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17+GitHub@gmail.com>
Date: Fri, 30 Nov 2018 12:46:03 -0800
Subject: [PATCH 43/78] REF: Convert test_hashing to pytest idiom (#24009)

Also patch bug in the tests as follows:

_check_equal appeared to be checking index=True/False,
but we actually need to do explicitly pass in
index=True/False for this to work.
---
 pandas/tests/util/test_hashing.py | 585 ++++++++++++++++--------------
 1 file changed, 316 insertions(+), 269 deletions(-)

diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py
index 9f5b4f7b90d9f8..84bc1863aadd99 100644
--- a/pandas/tests/util/test_hashing.py
+++ b/pandas/tests/util/test_hashing.py
@@ -10,272 +10,319 @@
 import pandas.util.testing as tm
 
 
-class TestHashing(object):
-
-    @pytest.fixture(params=[
-        Series([1, 2, 3] * 3, dtype='int32'),
-        Series([None, 2.5, 3.5] * 3, dtype='float32'),
-        Series(['a', 'b', 'c'] * 3, dtype='category'),
-        Series(['d', 'e', 'f'] * 3),
-        Series([True, False, True] * 3),
-        Series(pd.date_range('20130101', periods=9)),
-        Series(pd.date_range('20130101', periods=9, tz='US/Eastern')),
-        Series(pd.timedelta_range('2000', periods=9))])
-    def series(self, request):
-        return request.param
-
-    def test_consistency(self):
-        # check that our hash doesn't change because of a mistake
-        # in the actual code; this is the ground truth
-        result = hash_pandas_object(Index(['foo', 'bar', 'baz']))
-        expected = Series(np.array([3600424527151052760, 1374399572096150070,
-                                    477881037637427054], dtype='uint64'),
-                          index=['foo', 'bar', 'baz'])
-        tm.assert_series_equal(result, expected)
-
-    def test_hash_array(self, series):
-        a = series.values
-        tm.assert_numpy_array_equal(hash_array(a), hash_array(a))
-
-    def test_hash_array_mixed(self):
-        result1 = hash_array(np.array([3, 4, 'All']))
-        result2 = hash_array(np.array(['3', '4', 'All']))
-        result3 = hash_array(np.array([3, 4, 'All'], dtype=object))
-        tm.assert_numpy_array_equal(result1, result2)
-        tm.assert_numpy_array_equal(result1, result3)
-
-    @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')])
-    def test_hash_array_errors(self, val):
-        msg = 'must pass a ndarray-like'
-        with pytest.raises(TypeError, match=msg):
-            hash_array(val)
-
-    def check_equal(self, obj, **kwargs):
-        a = hash_pandas_object(obj, **kwargs)
-        b = hash_pandas_object(obj, **kwargs)
-        tm.assert_series_equal(a, b)
-
-        kwargs.pop('index', None)
-        a = hash_pandas_object(obj, **kwargs)
-        b = hash_pandas_object(obj, **kwargs)
-        tm.assert_series_equal(a, b)
-
-    def check_not_equal_with_index(self, obj):
-
-        # check that we are not hashing the same if
-        # we include the index
-        if not isinstance(obj, Index):
-            a = hash_pandas_object(obj, index=True)
-            b = hash_pandas_object(obj, index=False)
-            if len(obj):
-                assert not (a == b).all()
-
-    def test_hash_tuples(self):
-        tups = [(1, 'one'), (1, 'two'), (2, 'one')]
-        result = hash_tuples(tups)
-        expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = hash_tuples(tups[0])
-        assert result == expected[0]
-
-    @pytest.mark.parametrize('tup', [
-        (1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A'),
-        ('A', pd.Timestamp("2012-01-01"))])
-    def test_hash_tuple(self, tup):
-        # test equivalence between hash_tuples and hash_tuple
-        result = hash_tuple(tup)
-        expected = hash_tuples([tup])[0]
-        assert result == expected
-
-    @pytest.mark.parametrize('val', [
-        1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"),
-        pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
-        datetime.datetime(2012, 1, 1),
-        pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(),
-        pd.Timedelta('1 days'), datetime.timedelta(1),
-        pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1),
-        np.nan, pd.NaT, None])
-    def test_hash_scalar(self, val):
-        result = _hash_scalar(val)
-        expected = hash_array(np.array([val], dtype=object), categorize=True)
-        assert result[0] == expected[0]
-
-    @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')])
-    def test_hash_tuples_err(self, val):
-        msg = 'must be convertible to a list-of-tuples'
-        with pytest.raises(TypeError, match=msg):
-            hash_tuples(val)
-
-    def test_multiindex_unique(self):
-        mi = MultiIndex.from_tuples([(118, 472), (236, 118),
-                                     (51, 204), (102, 51)])
-        assert mi.is_unique is True
-        result = hash_pandas_object(mi)
-        assert result.is_unique is True
-
-    def test_multiindex_objects(self):
-        mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
-                        labels=[[0, 1, 0, 2], [2, 0, 0, 1]],
-                        names=['col1', 'col2'])
-        recons = mi._sort_levels_monotonic()
-
-        # these are equal
-        assert mi.equals(recons)
-        assert Index(mi.values).equals(Index(recons.values))
-
-        # _hashed_values and hash_pandas_object(..., index=False)
-        # equivalency
-        expected = hash_pandas_object(
-            mi, index=False).values
-        result = mi._hashed_values
-        tm.assert_numpy_array_equal(result, expected)
-
-        expected = hash_pandas_object(
-            recons, index=False).values
-        result = recons._hashed_values
-        tm.assert_numpy_array_equal(result, expected)
-
-        expected = mi._hashed_values
-        result = recons._hashed_values
-
-        # values should match, but in different order
-        tm.assert_numpy_array_equal(np.sort(result),
-                                    np.sort(expected))
-
-    @pytest.mark.parametrize('obj', [
-        Series([1, 2, 3]),
-        Series([1.0, 1.5, 3.2]),
-        Series([1.0, 1.5, np.nan]),
-        Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
-        Series(['a', 'b', 'c']),
-        Series(['a', np.nan, 'c']),
-        Series(['a', None, 'c']),
-        Series([True, False, True]),
-        Series(),
-        Index([1, 2, 3]),
-        Index([True, False, True]),
-        DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}),
-        DataFrame(),
-        tm.makeMissingDataframe(),
-        tm.makeMixedDataFrame(),
-        tm.makeTimeDataFrame(),
-        tm.makeTimeSeries(),
-        tm.makeTimedeltaIndex(),
-        tm.makePeriodIndex(),
-        Series(tm.makePeriodIndex()),
-        Series(pd.date_range('20130101', periods=3, tz='US/Eastern')),
-        MultiIndex.from_product([range(5), ['foo', 'bar', 'baz'],
-                                 pd.date_range('20130101', periods=2)]),
-        MultiIndex.from_product([pd.CategoricalIndex(list('aabc')), range(3)])
-    ])
-    def test_hash_pandas_object(self, obj):
-        self.check_equal(obj)
-        self.check_not_equal_with_index(obj)
-
-    def test_hash_pandas_object2(self, series):
-        self.check_equal(series)
-        self.check_not_equal_with_index(series)
-
-    @pytest.mark.parametrize('obj', [
-        Series([], dtype='float64'), Series([], dtype='object'), Index([])])
-    def test_hash_pandas_empty_object(self, obj):
-        # these are by-definition the same with
-        # or w/o the index as the data is empty
-        self.check_equal(obj)
-
-    @pytest.mark.parametrize('s1', [
-        Series(['a', 'b', 'c', 'd']),
-        Series([1000, 2000, 3000, 4000]),
-        Series(pd.date_range(0, periods=4))])
-    @pytest.mark.parametrize('categorize', [True, False])
-    def test_categorical_consistency(self, s1, categorize):
-        # GH15143
-        # Check that categoricals hash consistent with their values, not codes
-        # This should work for categoricals of any dtype
-        s2 = s1.astype('category').cat.set_categories(s1)
-        s3 = s2.cat.set_categories(list(reversed(s1)))
-
-        # These should all hash identically
-        h1 = hash_pandas_object(s1, categorize=categorize)
-        h2 = hash_pandas_object(s2, categorize=categorize)
-        h3 = hash_pandas_object(s3, categorize=categorize)
-        tm.assert_series_equal(h1, h2)
-        tm.assert_series_equal(h1, h3)
-
-    def test_categorical_with_nan_consistency(self):
-        c = pd.Categorical.from_codes(
-            [-1, 0, 1, 2, 3, 4],
-            categories=pd.date_range('2012-01-01', periods=5, name='B'))
-        expected = hash_array(c, categorize=False)
-        c = pd.Categorical.from_codes(
-            [-1, 0],
-            categories=[pd.Timestamp('2012-01-01')])
-        result = hash_array(c, categorize=False)
-        assert result[0] in expected
-        assert result[1] in expected
-
-    @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
-    def test_pandas_errors(self):
-        with pytest.raises(TypeError):
-            hash_pandas_object(pd.Timestamp('20130101'))
-
-        obj = tm.makePanel()
-
-        with pytest.raises(TypeError):
-            hash_pandas_object(obj)
-
-    def test_hash_keys(self):
-        # using different hash keys, should have different hashes
-        # for the same data
-
-        # this only matters for object dtypes
-        obj = Series(list('abc'))
-        a = hash_pandas_object(obj, hash_key='9876543210123456')
-        b = hash_pandas_object(obj, hash_key='9876543210123465')
-        assert (a != b).all()
-
-    def test_invalid_key(self):
-        # this only matters for object dtypes
-        msg = 'key should be a 16-byte string encoded'
-        with pytest.raises(ValueError, match=msg):
-            hash_pandas_object(Series(list('abc')), hash_key='foo')
-
-    def test_alread_encoded(self):
-        # if already encoded then ok
-
-        obj = Series(list('abc')).str.encode('utf8')
-        self.check_equal(obj)
-
-    def test_alternate_encoding(self):
-
-        obj = Series(list('abc'))
-        self.check_equal(obj, encoding='ascii')
-
-    @pytest.mark.parametrize('l_exp', range(8))
-    @pytest.mark.parametrize('l_add', [0, 1])
-    def test_same_len_hash_collisions(self, l_exp, l_add):
-        length = 2**(l_exp + 8) + l_add
-        s = tm.rands_array(length, 2)
-        result = hash_array(s, 'utf8')
-        assert not result[0] == result[1]
-
-    def test_hash_collisions(self):
-
-        # hash collisions are bad
-        # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
-        L = ['Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9',  # noqa
-             'Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe']  # noqa
-
-        # these should be different!
-        result1 = hash_array(np.asarray(L[0:1], dtype=object), 'utf8')
-        expected1 = np.array([14963968704024874985], dtype=np.uint64)
-        tm.assert_numpy_array_equal(result1, expected1)
-
-        result2 = hash_array(np.asarray(L[1:2], dtype=object), 'utf8')
-        expected2 = np.array([16428432627716348016], dtype=np.uint64)
-        tm.assert_numpy_array_equal(result2, expected2)
-
-        result = hash_array(np.asarray(L, dtype=object), 'utf8')
-        tm.assert_numpy_array_equal(
-            result, np.concatenate([expected1, expected2], axis=0))
+@pytest.fixture(params=[
+    Series([1, 2, 3] * 3, dtype="int32"),
+    Series([None, 2.5, 3.5] * 3, dtype="float32"),
+    Series(["a", "b", "c"] * 3, dtype="category"),
+    Series(["d", "e", "f"] * 3),
+    Series([True, False, True] * 3),
+    Series(pd.date_range("20130101", periods=9)),
+    Series(pd.date_range("20130101", periods=9, tz="US/Eastern")),
+    Series(pd.timedelta_range("2000", periods=9))])
+def series(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def index(request):
+    return request.param
+
+
+def _check_equal(obj, **kwargs):
+    """
+    Check that hashing an objects produces the same value each time.
+
+    Parameters
+    ----------
+    obj : object
+        The object to hash.
+    kwargs : kwargs
+        Keyword arguments to pass to the hashing function.
+    """
+    a = hash_pandas_object(obj, **kwargs)
+    b = hash_pandas_object(obj, **kwargs)
+    tm.assert_series_equal(a, b)
+
+
+def _check_not_equal_with_index(obj):
+    """
+    Check the hash of an object with and without its index is not the same.
+
+    Parameters
+    ----------
+    obj : object
+        The object to hash.
+    """
+    if not isinstance(obj, Index):
+        a = hash_pandas_object(obj, index=True)
+        b = hash_pandas_object(obj, index=False)
+
+        if len(obj):
+            assert not (a == b).all()
+
+
+def test_consistency():
+    # Check that our hash doesn't change because of a mistake
+    # in the actual code; this is the ground truth.
+    result = hash_pandas_object(Index(["foo", "bar", "baz"]))
+    expected = Series(np.array([3600424527151052760, 1374399572096150070,
+                                477881037637427054], dtype="uint64"),
+                      index=["foo", "bar", "baz"])
+    tm.assert_series_equal(result, expected)
+
+
+def test_hash_array(series):
+    arr = series.values
+    tm.assert_numpy_array_equal(hash_array(arr), hash_array(arr))
+
+
+@pytest.mark.parametrize("arr2", [
+    np.array([3, 4, "All"]),
+    np.array([3, 4, "All"], dtype=object),
+])
+def test_hash_array_mixed(arr2):
+    result1 = hash_array(np.array(["3", "4", "All"]))
+    result2 = hash_array(arr2)
+
+    tm.assert_numpy_array_equal(result1, result2)
+
+
+@pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")])
+def test_hash_array_errors(val):
+    msg = "must pass a ndarray-like"
+    with pytest.raises(TypeError, match=msg):
+        hash_array(val)
+
+
+def test_hash_tuples():
+    tuples = [(1, "one"), (1, "two"), (2, "one")]
+    result = hash_tuples(tuples)
+
+    expected = hash_pandas_object(MultiIndex.from_tuples(tuples)).values
+    tm.assert_numpy_array_equal(result, expected)
+
+    result = hash_tuples(tuples[0])
+    assert result == expected[0]
+
+
+@pytest.mark.parametrize("tup", [
+    (1, "one"), (1, np.nan), (1.0, pd.NaT, "A"),
+    ("A", pd.Timestamp("2012-01-01"))])
+def test_hash_tuple(tup):
+    # Test equivalence between
+    # hash_tuples and hash_tuple.
+    result = hash_tuple(tup)
+    expected = hash_tuples([tup])[0]
+
+    assert result == expected
+
+
+@pytest.mark.parametrize("val", [
+    1, 1.4, "A", b"A", u"A", pd.Timestamp("2012-01-01"),
+    pd.Timestamp("2012-01-01", tz="Europe/Brussels"),
+    datetime.datetime(2012, 1, 1),
+    pd.Timestamp("2012-01-01", tz="EST").to_pydatetime(),
+    pd.Timedelta("1 days"), datetime.timedelta(1),
+    pd.Period("2012-01-01", freq="D"), pd.Interval(0, 1),
+    np.nan, pd.NaT, None])
+def test_hash_scalar(val):
+    result = _hash_scalar(val)
+    expected = hash_array(np.array([val], dtype=object), categorize=True)
+
+    assert result[0] == expected[0]
+
+
+@pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")])
+def test_hash_tuples_err(val):
+    msg = "must be convertible to a list-of-tuples"
+    with pytest.raises(TypeError, match=msg):
+        hash_tuples(val)
+
+
+def test_multiindex_unique():
+    mi = MultiIndex.from_tuples([(118, 472), (236, 118),
+                                 (51, 204), (102, 51)])
+    assert mi.is_unique is True
+
+    result = hash_pandas_object(mi)
+    assert result.is_unique is True
+
+
+def test_multiindex_objects():
+    mi = MultiIndex(levels=[["b", "d", "a"], [1, 2, 3]],
+                    labels=[[0, 1, 0, 2], [2, 0, 0, 1]],
+                    names=["col1", "col2"])
+    recons = mi._sort_levels_monotonic()
+
+    # These are equal.
+    assert mi.equals(recons)
+    assert Index(mi.values).equals(Index(recons.values))
+
+    # _hashed_values and hash_pandas_object(..., index=False) equivalency.
+    expected = hash_pandas_object(mi, index=False).values
+    result = mi._hashed_values
+
+    tm.assert_numpy_array_equal(result, expected)
+
+    expected = hash_pandas_object(recons, index=False).values
+    result = recons._hashed_values
+
+    tm.assert_numpy_array_equal(result, expected)
+
+    expected = mi._hashed_values
+    result = recons._hashed_values
+
+    # Values should match, but in different order.
+    tm.assert_numpy_array_equal(np.sort(result), np.sort(expected))
+
+
+@pytest.mark.parametrize("obj", [
+    Series([1, 2, 3]),
+    Series([1.0, 1.5, 3.2]),
+    Series([1.0, 1.5, np.nan]),
+    Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
+    Series(["a", "b", "c"]),
+    Series(["a", np.nan, "c"]),
+    Series(["a", None, "c"]),
+    Series([True, False, True]),
+    Series(),
+    Index([1, 2, 3]),
+    Index([True, False, True]),
+    DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}),
+    DataFrame(),
+    tm.makeMissingDataframe(),
+    tm.makeMixedDataFrame(),
+    tm.makeTimeDataFrame(),
+    tm.makeTimeSeries(),
+    tm.makeTimedeltaIndex(),
+    tm.makePeriodIndex(),
+    Series(tm.makePeriodIndex()),
+    Series(pd.date_range("20130101", periods=3, tz="US/Eastern")),
+    MultiIndex.from_product([range(5), ["foo", "bar", "baz"],
+                             pd.date_range("20130101", periods=2)]),
+    MultiIndex.from_product([pd.CategoricalIndex(list("aabc")), range(3)])
+])
+def test_hash_pandas_object(obj, index):
+    _check_equal(obj, index=index)
+    _check_not_equal_with_index(obj)
+
+
+def test_hash_pandas_object2(series, index):
+    _check_equal(series, index=index)
+    _check_not_equal_with_index(series)
+
+
+@pytest.mark.parametrize("obj", [
+    Series([], dtype="float64"), Series([], dtype="object"), Index([])])
+def test_hash_pandas_empty_object(obj, index):
+    # These are by-definition the same with
+    # or without the index as the data is empty.
+    _check_equal(obj, index=index)
+
+
+@pytest.mark.parametrize("s1", [
+    Series(["a", "b", "c", "d"]),
+    Series([1000, 2000, 3000, 4000]),
+    Series(pd.date_range(0, periods=4))])
+@pytest.mark.parametrize("categorize", [True, False])
+def test_categorical_consistency(s1, categorize):
+    # see gh-15143
+    #
+    # Check that categoricals hash consistent with their values,
+    # not codes. This should work for categoricals of any dtype.
+    s2 = s1.astype("category").cat.set_categories(s1)
+    s3 = s2.cat.set_categories(list(reversed(s1)))
+
+    # These should all hash identically.
+    h1 = hash_pandas_object(s1, categorize=categorize)
+    h2 = hash_pandas_object(s2, categorize=categorize)
+    h3 = hash_pandas_object(s3, categorize=categorize)
+
+    tm.assert_series_equal(h1, h2)
+    tm.assert_series_equal(h1, h3)
+
+
+def test_categorical_with_nan_consistency():
+    c = pd.Categorical.from_codes(
+        [-1, 0, 1, 2, 3, 4],
+        categories=pd.date_range("2012-01-01", periods=5, name="B"))
+    expected = hash_array(c, categorize=False)
+
+    c = pd.Categorical.from_codes(
+        [-1, 0],
+        categories=[pd.Timestamp("2012-01-01")])
+    result = hash_array(c, categorize=False)
+
+    assert result[0] in expected
+    assert result[1] in expected
+
+
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
+@pytest.mark.parametrize("obj", [pd.Timestamp("20130101"), tm.makePanel()])
+def test_pandas_errors(obj):
+    msg = "Unexpected type for hashing"
+    with pytest.raises(TypeError, match=msg):
+        hash_pandas_object(obj)
+
+
+def test_hash_keys():
+    # Using different hash keys, should have
+    # different hashes for the same data.
+    #
+    # This only matters for object dtypes.
+    obj = Series(list("abc"))
+
+    a = hash_pandas_object(obj, hash_key="9876543210123456")
+    b = hash_pandas_object(obj, hash_key="9876543210123465")
+
+    assert (a != b).all()
+
+
+def test_invalid_key():
+    # This only matters for object dtypes.
+    msg = "key should be a 16-byte string encoded"
+
+    with pytest.raises(ValueError, match=msg):
+        hash_pandas_object(Series(list("abc")), hash_key="foo")
+
+
+def test_already_encoded(index):
+    # If already encoded, then ok.
+    obj = Series(list("abc")).str.encode("utf8")
+    _check_equal(obj, index=index)
+
+
+def test_alternate_encoding(index):
+    obj = Series(list("abc"))
+    _check_equal(obj, index=index, encoding="ascii")
+
+
+@pytest.mark.parametrize("l_exp", range(8))
+@pytest.mark.parametrize("l_add", [0, 1])
+def test_same_len_hash_collisions(l_exp, l_add):
+    length = 2**(l_exp + 8) + l_add
+    s = tm.rands_array(length, 2)
+
+    result = hash_array(s, "utf8")
+    assert not result[0] == result[1]
+
+
+def test_hash_collisions():
+    # Hash collisions are bad.
+    #
+    # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
+    hashes = ["Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9",  # noqa
+              "Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe"]  # noqa
+
+    # These should be different.
+    result1 = hash_array(np.asarray(hashes[0:1], dtype=object), "utf8")
+    expected1 = np.array([14963968704024874985], dtype=np.uint64)
+    tm.assert_numpy_array_equal(result1, expected1)
+
+    result2 = hash_array(np.asarray(hashes[1:2], dtype=object), "utf8")
+    expected2 = np.array([16428432627716348016], dtype=np.uint64)
+    tm.assert_numpy_array_equal(result2, expected2)
+
+    result = hash_array(np.asarray(hashes, dtype=object), "utf8")
+    tm.assert_numpy_array_equal(result, np.concatenate([expected1,
+                                                        expected2], axis=0))

From 6194b6393b939a8a0aa9bf4904ccab75e747c108 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Fri, 30 Nov 2018 15:32:48 -0600
Subject: [PATCH 44/78] PERF: Use Categorical.equals in CategoricalIndex.equals
 (#24023)

---
 doc/source/whatsnew/v0.24.0.rst | 1 +
 pandas/core/indexes/category.py | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index f888648a9363e4..bd474092a0b78f 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1213,6 +1213,7 @@ Performance Improvements
   The speed increase is both when indexing by label (using .loc) and position(.iloc) (:issue:`20395`)
   Slicing a monotonically increasing :class:`CategoricalIndex` itself (i.e. ``ci[1000:2000]``)
   shows similar speed improvements as above (:issue:`21659`)
+- Improved performance of :meth:`CategoricalIndex.equals` when comparing to another :class:`CategoricalIndex` (:issue:`24023`)
 - Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
 - Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`, :issue:`21606`)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 6b84e8deea4934..91c7648d5cf2ea 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -13,7 +13,7 @@
     is_scalar)
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.generic import ABCCategorical, ABCSeries
-from pandas.core.dtypes.missing import array_equivalent, isna
+from pandas.core.dtypes.missing import isna
 
 from pandas.core import accessor
 from pandas.core.algorithms import take_1d
@@ -283,7 +283,9 @@ def equals(self, other):
 
         try:
             other = self._is_dtype_compat(other)
-            return array_equivalent(self._data, other)
+            if isinstance(other, type(self)):
+                other = other._data
+            return self._data.equals(other)
         except (TypeError, ValueError):
             pass
 

From 5411421799d14a12dd1762ae618c34eab469411d Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Fri, 30 Nov 2018 21:33:42 +0000
Subject: [PATCH 45/78] TST/CI: Unify CI test scripts (single and multi) and
 simplify the system (#23924)

* WIP: unifying script_single and script_multi

* Unified scripts that run the tests (single and multi)

* pending fixes

* logging when we upload the coverage

* Restoring pytest --skip-slow... options

* Not running tests on doc build
---
 .circleci/config.yml      |  4 +--
 .travis.yml               | 24 ++++++++----------
 ci/azure/linux.yml        | 10 +++-----
 ci/azure/macos.yml        |  6 ++---
 ci/azure/windows-py27.yml |  2 +-
 ci/azure/windows.yml      |  2 +-
 ci/run_tests.sh           | 52 +++++++++++++++++++++++++++++++++++++++
 ci/script_multi.sh        | 49 ------------------------------------
 ci/script_single.sh       | 39 -----------------------------
 ci/upload_coverage.sh     | 11 ---------
 10 files changed, 72 insertions(+), 127 deletions(-)
 create mode 100755 ci/run_tests.sh
 delete mode 100755 ci/script_multi.sh
 delete mode 100755 ci/script_single.sh
 delete mode 100755 ci/upload_coverage.sh

diff --git a/.circleci/config.yml b/.circleci/config.yml
index dc4162a0674fdf..6b516b21722acb 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -34,5 +34,5 @@ jobs:
           command: |
             export PATH="$MINICONDA_DIR/bin:$PATH"
             source activate pandas-dev
-            echo "pytest --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas"
-            pytest       --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas
+            echo "pytest -m "not slow and not network" --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml pandas"
+            pytest       -m "not slow and not network" --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml pandas
diff --git a/.travis.yml b/.travis.yml
index 3217fc5aa1ed6b..6bbc44fba864af 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -34,28 +34,28 @@ matrix:
     include:
     - dist: trusty
       env:
-        - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network"
+        - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="not slow and not network"
 
     - dist: trusty
       env:
-        - JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/deps/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true
+        - JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/deps/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" PATTERN="slow"
       addons:
         apt:
           packages:
           - language-pack-zh-hans
     - dist: trusty
       env:
-        - JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" TEST_ARGS="--skip-slow"
+        - JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" PATTERN="not slow"
       addons:
         apt:
           packages:
           - python-gtk2
     - dist: trusty
       env:
-        - JOB="3.6, lint, coverage" ENV_FILE="ci/deps/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true LINT=true
+        - JOB="3.6, lint, coverage" ENV_FILE="ci/deps/travis-36.yaml" PATTERN="not slow and not network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true LINT=true
     - dist: trusty
       env:
-        - JOB="3.7, NumPy dev" ENV_FILE="ci/deps/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
+        - JOB="3.7, NumPy dev" ENV_FILE="ci/deps/travis-37-numpydev.yaml" PATTERN="not slow and not network" TEST_ARGS="-W error" PANDAS_TESTING_MODE="deprecate"
       addons:
         apt:
           packages:
@@ -64,7 +64,7 @@ matrix:
     # In allow_failures
     - dist: trusty
       env:
-        - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" SLOW=true
+        - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
 
     # In allow_failures
     - dist: trusty
@@ -73,7 +73,7 @@ matrix:
     allow_failures:
       - dist: trusty
         env:
-          - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" SLOW=true
+          - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
       - dist: trusty
         env:
           - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
@@ -107,20 +107,16 @@ script:
   - echo "script start"
   - source activate pandas-dev
   - ci/run_build_docs.sh
-  - ci/script_single.sh
-  - ci/script_multi.sh
+  - ci/run_tests.sh
   - ci/code_checks.sh
 
-after_success:
-  -  ci/upload_coverage.sh
-
 after_script:
   - echo "after_script start"
   - source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
   - if [ -e test-data-single.xml ]; then
-    ci/print_skipped.py test-data-single.xml;
+        ci/print_skipped.py test-data-single.xml;
     fi
   - if [ -e test-data-multiple.xml ]; then
-    ci/print_skipped.py test-data-multiple.xml;
+        ci/print_skipped.py test-data-multiple.xml;
     fi
   - echo "after_script done"
diff --git a/ci/azure/linux.yml b/ci/azure/linux.yml
index 7fa8a9a1783f90..fe64307e9d08f9 100644
--- a/ci/azure/linux.yml
+++ b/ci/azure/linux.yml
@@ -12,18 +12,18 @@ jobs:
       py27_np_120:
         ENV_FILE: ci/deps/azure-27-compat.yaml
         CONDA_PY: "27"
-        TEST_ARGS: "--skip-slow --skip-network"
+        PATTERN: "not slow and not network"
 
       py37_locale:
         ENV_FILE: ci/deps/azure-37-locale.yaml
         CONDA_PY: "37"
-        TEST_ARGS: "--skip-slow --skip-network"
+        PATTERN: "not slow and not network"
         LOCALE_OVERRIDE: "zh_CN.UTF-8"
 
       py36_locale_slow:
         ENV_FILE: ci/deps/azure-36-locale_slow.yaml
         CONDA_PY: "36"
-        TEST_ARGS: "--only-slow --skip-network"
+        PATTERN: "not slow and not network"
         LOCALE_OVERRIDE: "it_IT.UTF-8"
 
   steps:
@@ -43,9 +43,7 @@ jobs:
     - script: |
         export PATH=$HOME/miniconda3/bin:$PATH
         source activate pandas-dev
-        ci/script_single.sh
-        ci/script_multi.sh
-        echo "[Test done]"
+        ci/run_tests.sh
       displayName: 'Test'
     - script: |
         export PATH=$HOME/miniconda3/bin:$PATH
diff --git a/ci/azure/macos.yml b/ci/azure/macos.yml
index d537f0c70cbec8..98409576a5a875 100644
--- a/ci/azure/macos.yml
+++ b/ci/azure/macos.yml
@@ -12,7 +12,7 @@ jobs:
       py35_np_120:
         ENV_FILE: ci/deps/azure-macos-35.yaml
         CONDA_PY: "35"
-        TEST_ARGS: "--skip-slow --skip-network"
+        PATTERN: "not slow and not network"
 
   steps:
     - script: |
@@ -31,9 +31,7 @@ jobs:
     - script: |
         export PATH=$HOME/miniconda3/bin:$PATH
         source activate pandas-dev
-        ci/script_single.sh
-        ci/script_multi.sh
-        echo "[Test done]"
+        ci/run_tests.sh
       displayName: 'Test'
     - script: |
         export PATH=$HOME/miniconda3/bin:$PATH
diff --git a/ci/azure/windows-py27.yml b/ci/azure/windows-py27.yml
index ac918f3becd2eb..0d9aea816c4adc 100644
--- a/ci/azure/windows-py27.yml
+++ b/ci/azure/windows-py27.yml
@@ -37,7 +37,7 @@ jobs:
       displayName: 'Build'
     - script: |
         call activate pandas-dev
-        pytest --junitxml=test-data.xml --skip-slow --skip-network pandas -n 2 -r sxX --strict --durations=10 %*
+        pytest -m "not slow and not network" --junitxml=test-data.xml pandas -n 2 -r sxX --strict --durations=10 %*
       displayName: 'Test'
     - task: PublishTestResults@2
       inputs:
diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml
index f0ebba509e4419..b69c210ca27ba4 100644
--- a/ci/azure/windows.yml
+++ b/ci/azure/windows.yml
@@ -28,7 +28,7 @@ jobs:
       displayName: 'Build'
     - script: |
         call activate pandas-dev
-        pytest --junitxml=test-data.xml --skip-slow --skip-network pandas -n 2 -r sxX --strict --durations=10 %*
+        pytest -m "not slow and not network" --junitxml=test-data.xml pandas -n 2 -r sxX --strict --durations=10 %*
       displayName: 'Test'
     - task: PublishTestResults@2
       inputs:
diff --git a/ci/run_tests.sh b/ci/run_tests.sh
new file mode 100755
index 00000000000000..77efc60a8cf972
--- /dev/null
+++ b/ci/run_tests.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+if [ "$DOC" ]; then
+    echo "We are not running pytest as this is a doc-build"
+    exit 0
+fi
+
+# Workaround for pytest-xdist flaky collection order
+# https://github.com/pytest-dev/pytest/issues/920
+# https://github.com/pytest-dev/pytest/issues/1075
+export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
+
+if [ -n "$LOCALE_OVERRIDE" ]; then
+    export LC_ALL="$LOCALE_OVERRIDE"
+    export LANG="$LOCALE_OVERRIDE"
+    PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
+    if [[ "$LOCALE_OVERIDE" != "$PANDAS_LOCALE" ]]; then
+        echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
+        # TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
+        # exit 1
+    fi
+fi
+if [[ "not network" == *"$PATTERN"* ]]; then
+    export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
+fi
+
+
+if [ -n "$PATTERN" ]; then
+    PATTERN=" and $PATTERN"
+fi
+
+for TYPE in single multiple
+do
+    if [ "$COVERAGE" ]; then
+        COVERAGE_FNAME="/tmp/coc-$TYPE.xml"
+        COVERAGE="-s --cov=pandas --cov-report=xml:$COVERAGE_FNAME"
+    fi
+
+    TYPE_PATTERN=$TYPE
+    NUM_JOBS=1
+    if [[ "$TYPE_PATTERN" == "multiple" ]]; then
+        TYPE_PATTERN="not single"
+        NUM_JOBS=2
+    fi
+
+    pytest -m "$TYPE_PATTERN$PATTERN" -n $NUM_JOBS -s --strict --durations=10 --junitxml=test-data-$TYPE.xml $TEST_ARGS $COVERAGE pandas
+
+    if [[ "$COVERAGE" && $? == 0 ]]; then
+        echo "uploading coverage for $TYPE tests"
+        bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
+    fi
+done
diff --git a/ci/script_multi.sh b/ci/script_multi.sh
deleted file mode 100755
index fba0c7ba19dd4b..00000000000000
--- a/ci/script_multi.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash -e
-
-echo "[script multi]"
-
-if [ -n "$LOCALE_OVERRIDE" ]; then
-    export LC_ALL="$LOCALE_OVERRIDE";
-    export LANG="$LOCALE_OVERRIDE";
-    echo "Setting LC_ALL to $LOCALE_OVERRIDE"
-
-    pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))'
-    python -c "$pycmd"
-fi
-
-# Enforce absent network during testing by faking a proxy
-if echo "$TEST_ARGS" | grep -e --skip-network -q; then
-    export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
-fi
-
-# Workaround for pytest-xdist flaky collection order
-# https://github.com/pytest-dev/pytest/issues/920
-# https://github.com/pytest-dev/pytest/issues/1075
-export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
-echo PYTHONHASHSEED=$PYTHONHASHSEED
-
-if [ "$DOC" ]; then
-    echo "We are not running pytest as this is a doc-build"
-
-elif [ "$COVERAGE" ]; then
-    echo pytest -s -n 2 -m "not single" --durations=10 --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas
-    pytest      -s -n 2 -m "not single" --durations=10 --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas
-
-elif [ "$SLOW" ]; then
-    TEST_ARGS="--only-slow --skip-network"
-    # The `-m " and slow"` is redundant here, as `--only-slow` is already used (via $TEST_ARGS). But is needed, because with
-    # `--only-slow` fast tests are skipped, but each of them is printed in the log (which can be avoided with `-q`),
-    # and also added to `test-data-multiple.xml`, and then printed in the log in the call to `ci/print_skipped.py`.
-    # Printing them to the log makes the log exceed the maximum size allowed by Travis and makes the build fail.
-    echo pytest -n 2 -m "not single and slow" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas
-    pytest      -n 2 -m "not single and slow" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas
-
-else
-    echo pytest -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas
-    pytest      -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas # TODO: doctest
-
-fi
-
-RET="$?"
-
-exit "$RET"
diff --git a/ci/script_single.sh b/ci/script_single.sh
deleted file mode 100755
index cbbb7a49541c21..00000000000000
--- a/ci/script_single.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-echo "[script_single]"
-
-if [ -n "$LOCALE_OVERRIDE" ]; then
-    echo "Setting LC_ALL and LANG to $LOCALE_OVERRIDE"
-    export LC_ALL="$LOCALE_OVERRIDE";
-    export LANG="$LOCALE_OVERRIDE";
-
-    pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))'
-    python -c "$pycmd"
-fi
-
-if [ "$SLOW" ]; then
-    TEST_ARGS="--only-slow --skip-network"
-fi
-
-# Enforce absent network during testing by faking a proxy
-if echo "$TEST_ARGS" | grep -e --skip-network -q; then
-    export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
-fi
-
-if [ "$DOC" ]; then
-    echo "We are not running pytest as this is a doc-build"
-
-elif [ "$COVERAGE" ]; then
-    echo pytest -s -m "single" --durations=10 --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=test-data-single.xml $TEST_ARGS pandas
-    pytest      -s -m "single" --durations=10 --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=test-data-single.xml $TEST_ARGS pandas
-    echo pytest -s --strict scripts
-    pytest      -s --strict scripts
-else
-    echo pytest -m "single" --durations=10 --junitxml=test-data-single.xml --strict $TEST_ARGS pandas
-    pytest      -m "single" --durations=10 --junitxml=test-data-single.xml --strict $TEST_ARGS pandas
-
-fi
-
-RET="$?"
-
-exit "$RET"
diff --git a/ci/upload_coverage.sh b/ci/upload_coverage.sh
deleted file mode 100755
index 88aca20590505b..00000000000000
--- a/ci/upload_coverage.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-if [ -z "$COVERAGE" ]; then
-   echo "coverage is not selected for this build"
-   exit 0
-fi
-
-
-echo "uploading coverage"
-bash <(curl -s https://codecov.io/bash) -Z -c -F single -f /tmp/cov-single.xml
-bash <(curl -s https://codecov.io/bash) -Z -c -F multiple -f /tmp/cov-multiple.xml

From cfacd4c0167b16d7fc65ed93929de3ca23fbeb42 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Fri, 30 Nov 2018 21:34:11 +0000
Subject: [PATCH 46/78] CI: Removing outdated and unused CI files (#24004)

---
 ci/README.txt        | 17 -----------------
 ci/print_versions.py | 29 -----------------------------
 2 files changed, 46 deletions(-)
 delete mode 100644 ci/README.txt
 delete mode 100755 ci/print_versions.py

diff --git a/ci/README.txt b/ci/README.txt
deleted file mode 100644
index bb71dc25d60932..00000000000000
--- a/ci/README.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Travis is a ci service that's well-integrated with GitHub.
-The following types of breakage should be detected
-by Travis builds:
-
-1) Failing tests on any supported version of Python.
-2) Pandas should install and the tests should run if no optional deps are installed.
-That also means tests which rely on optional deps need to raise SkipTest()
-if the dep is missing.
-3) unicode related fails when running under exotic locales.
-
-We tried running the vbench suite for a while, but with varying load
-on Travis machines, that wasn't useful.
-
-Travis currently (4/2013) has a 5-job concurrency limit. Exceeding it
-basically doubles the total runtime for a commit through travis, and
-since dep+pandas installation is already quite long, this should become
-a hard limit on concurrent travis runs.
diff --git a/ci/print_versions.py b/ci/print_versions.py
deleted file mode 100755
index a2c93748b03881..00000000000000
--- a/ci/print_versions.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env python
-
-
-def show_versions(as_json=False):
-    import imp
-    import os
-    fn = __file__
-    this_dir = os.path.dirname(fn)
-    pandas_dir = os.path.abspath(os.path.join(this_dir, ".."))
-    sv_path = os.path.join(pandas_dir, 'pandas', 'util')
-    mod = imp.load_module(
-        'pvmod', *imp.find_module('print_versions', [sv_path]))
-    return mod.show_versions(as_json)
-
-
-if __name__ == '__main__':
-    # optparse is 2.6-safe
-    from optparse import OptionParser
-    parser = OptionParser()
-    parser.add_option("-j", "--json", metavar="FILE", nargs=1,
-                      help="Save output as JSON into file, "
-                           "pass in '-' to output to stdout")
-
-    (options, args) = parser.parse_args()
-
-    if options.json == "-":
-        options.json = True
-
-    show_versions(as_json=options.json)

From bdeddb139c1f9d95a7b32095884849856994ec41 Mon Sep 17 00:00:00 2001
From: Erik <oveson@outlook.com>
Date: Fri, 30 Nov 2018 13:37:19 -0800
Subject: [PATCH 47/78] PERF: For GH23814, return early in Categorical.__init__
 (#23888)

---
 asv_bench/benchmarks/categoricals.py |  8 ++++++++
 doc/source/whatsnew/v0.24.0.rst      |  2 +-
 pandas/core/arrays/categorical.py    | 10 ++++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 8a0fbc48755b57..7318b40efc8fb8 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -46,6 +46,8 @@ def setup(self):
         self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
         self.values_all_nan = [np.nan] * len(self.values)
         self.values_all_int8 = np.ones(N, 'int8')
+        self.categorical = pd.Categorical(self.values, self.categories)
+        self.series = pd.Series(self.categorical)
 
     def time_regular(self):
         pd.Categorical(self.values, self.categories)
@@ -68,6 +70,12 @@ def time_all_nan(self):
     def time_from_codes_all_int8(self):
         pd.Categorical.from_codes(self.values_all_int8, self.categories)
 
+    def time_existing_categorical(self):
+        pd.Categorical(self.categorical)
+
+    def time_existing_series(self):
+        pd.Categorical(self.series)
+
 
 class ValueCounts(object):
 
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index bd474092a0b78f..ce2a76459b6344 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1229,7 +1229,7 @@ Performance Improvements
 - Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`)
 - Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`)
 - Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`)
-
+- Improved performance of :class:`Categorical` constructor for `Series` objects (:issue:`23814`)
 
 .. _whatsnew_0240.docs:
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 5db851d4bf0211..42696e4796fe09 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -347,6 +347,16 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
             # the "ordered" and "categories" arguments
             dtype = values.dtype._from_categorical_dtype(values.dtype,
                                                          categories, ordered)
+
+            # GH23814, for perf, if values._values already an instance of
+            # Categorical, set values to codes, and run fastpath
+            if (isinstance(values, (ABCSeries, ABCIndexClass)) and
+               isinstance(values._values, type(self))):
+                values = values._values.codes.copy()
+                if categories is None:
+                    categories = dtype.categories
+                fastpath = True
+
         else:
             # If dtype=None and values is not categorical, create a new dtype
             dtype = CategoricalDtype(categories, ordered)

From fa547d83cc566e77ba54c1b8fe16eaba7be718e2 Mon Sep 17 00:00:00 2001
From: Kaiqi Dong <kaiqidong1991@gmail.com>
Date: Sat, 1 Dec 2018 08:17:13 +0800
Subject: [PATCH 48/78] BUG: pandas.cut should disallow overlapping
 IntervalIndex bins (#23999)

---
 doc/source/whatsnew/v0.24.0.rst   | 1 +
 pandas/core/reshape/tile.py       | 7 +++++--
 pandas/tests/reshape/test_tile.py | 6 ++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index ce2a76459b6344..fc505128a2e202 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1530,6 +1530,7 @@ Reshaping
 - Bug in :func:`pandas.melt` when passing column names that are not present in ``DataFrame`` (:issue:`23575`)
 - Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`)
 - Bug in ``Series`` construction when passing no data and ``dtype=str`` (:issue:`22477`)
+- Bug in :func:`cut` with ``bins`` as an overlapping ``IntervalIndex`` where multiple bins were returned per item instead of raising a ``ValueError`` (:issue:`23980`)
 
 .. _whatsnew_0240.bug_fixes.sparse:
 
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index 8ad2a48e8767c4..5d5f6cf8102be7 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -43,7 +43,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
           and maximum values of `x`.
         * sequence of scalars : Defines the bin edges allowing for non-uniform
           width. No extension of the range of `x` is done.
-        * IntervalIndex : Defines the exact bins to be used.
+        * IntervalIndex : Defines the exact bins to be used. Note that
+          IntervalIndex for `bins` must be non-overlapping.
 
     right : bool, default True
         Indicates whether `bins` includes the rightmost edge or not. If
@@ -217,7 +218,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
                 bins[-1] += adj
 
     elif isinstance(bins, IntervalIndex):
-        pass
+        if bins.is_overlapping:
+            raise ValueError('Overlapping IntervalIndex is not accepted.')
+
     else:
         bins = np.asarray(bins)
         bins = _convert_bin_to_numeric_type(bins, dtype)
diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py
index f04e9a55a6c8dc..b0445f5a9e2d50 100644
--- a/pandas/tests/reshape/test_tile.py
+++ b/pandas/tests/reshape/test_tile.py
@@ -91,6 +91,12 @@ def test_bins_from_intervalindex(self):
         tm.assert_numpy_array_equal(result.codes,
                                     np.array([1, 1, 2], dtype='int8'))
 
+    def test_bins_not_overlapping_from_intervalindex(self):
+        # verify if issue 23980 is properly solved.
+        ii = IntervalIndex.from_tuples([(0, 10), (2, 12), (4, 14)])
+        with pytest.raises(ValueError):
+            cut([5, 6], bins=ii)
+
     def test_bins_not_monotonic(self):
         data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1]
         pytest.raises(ValueError, cut, data, [0.1, 1.5, 1, 10])

From 5b6b346b9db57b610df2965f60753473ef16e16f Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17+GitHub@gmail.com>
Date: Fri, 30 Nov 2018 17:58:59 -0800
Subject: [PATCH 49/78] TST: Check error message in overlapping bins test
 (#24029)

Follow-up to gh-23999.

xref gh-23922.
---
 pandas/tests/reshape/test_tile.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py
index b0445f5a9e2d50..19f1a9a8b65c75 100644
--- a/pandas/tests/reshape/test_tile.py
+++ b/pandas/tests/reshape/test_tile.py
@@ -92,9 +92,11 @@ def test_bins_from_intervalindex(self):
                                     np.array([1, 1, 2], dtype='int8'))
 
     def test_bins_not_overlapping_from_intervalindex(self):
-        # verify if issue 23980 is properly solved.
+        # see gh-23980
+        msg = "Overlapping IntervalIndex is not accepted"
         ii = IntervalIndex.from_tuples([(0, 10), (2, 12), (4, 14)])
-        with pytest.raises(ValueError):
+
+        with pytest.raises(ValueError, match=msg):
             cut([5, 6], bins=ii)
 
     def test_bins_not_monotonic(self):

From 5b0610b875476a6f3727d7e9bedb90d370c669b5 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Sat, 1 Dec 2018 12:09:43 +0000
Subject: [PATCH 50/78] DEPS: Pinning versions of some dependencies to speed up
 environment creation (#24015)

* DEPS: Pinning versions of some dependencies, to fix conda problem that makes solving the environment take around 45 minutes

* Making pinned dependencies versions greater or equal, instead of just equal
---
 environment.yml      | 16 ++++++----------
 requirements-dev.txt | 16 ++++++----------
 2 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/environment.yml b/environment.yml
index fc35f1290f1b13..aa3f1fe15820ec 100644
--- a/environment.yml
+++ b/environment.yml
@@ -4,22 +4,21 @@ channels:
   - conda-forge
 dependencies:
   # required
-  - NumPy
+  - numpy>=1.15
   - python=3
   - python-dateutil>=2.5.0
   - pytz
 
   # development
-  - Cython>=0.28.2
+  - cython>=0.28.2
   - flake8
   - flake8-comprehensions
   - flake8-rst=0.4.2
   - gitpython
-  - hypothesis>=3.58.0
+  - hypothesis>=3.82
   - isort
   - moto
-  - pytest>=3.6
-  - setuptools>=24.2.0
+  - pytest>=4.0
   - sphinx
   - sphinxcontrib-spelling
 
@@ -28,7 +27,6 @@ dependencies:
   - blosc
   - bottleneck>=1.2.0
   - fastparquet>=0.1.2
-  - gcsfs
   - html5lib
   - ipython>=5.6.0
   - ipykernel
@@ -36,15 +34,13 @@ dependencies:
   - lxml
   - matplotlib>=2.0.0
   - nbsphinx
-  - numexpr>=2.6.1
+  - numexpr>=2.6.8
   - openpyxl
   - pyarrow>=0.7.0
-  - pymysql
   - pytables>=3.4.2
   - pytest-cov
   - pytest-xdist
-  - s3fs
-  - scipy>=0.18.1
+  - scipy>=1.1
   - seaborn
   - sqlalchemy
   - statsmodels
diff --git a/requirements-dev.txt b/requirements-dev.txt
index d01a21ac5fed55..d24baf3c733568 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,23 +1,21 @@
-NumPy
+numpy>=1.15
 python-dateutil>=2.5.0
 pytz
-Cython>=0.28.2
+cython>=0.28.2
 flake8
 flake8-comprehensions
 flake8-rst==0.4.2
 gitpython
-hypothesis>=3.58.0
+hypothesis>=3.82
 isort
 moto
-pytest>=3.6
-setuptools>=24.2.0
+pytest>=4.0
 sphinx
 sphinxcontrib-spelling
 beautifulsoup4>=4.2.1
 blosc
 bottleneck>=1.2.0
 fastparquet>=0.1.2
-gcsfs
 html5lib
 ipython>=5.6.0
 ipykernel
@@ -25,15 +23,13 @@ jinja2
 lxml
 matplotlib>=2.0.0
 nbsphinx
-numexpr>=2.6.1
+numexpr>=2.6.8
 openpyxl
 pyarrow>=0.7.0
-pymysql
 tables>=3.4.2
 pytest-cov
 pytest-xdist
-s3fs
-scipy>=0.18.1
+scipy>=1.1
 seaborn
 sqlalchemy
 statsmodels

From b45eb265dd88648fb02ebb0fdcfb168364ab664e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Sat, 1 Dec 2018 15:37:34 -0800
Subject: [PATCH 51/78] DOC: Update DateOffset intro in timeseries.rst (#23385)

---
 doc/source/timeseries.rst | 410 +++++++++++++++++++-------------------
 1 file changed, 206 insertions(+), 204 deletions(-)

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index 4fa1cb8be92345..c962486711cf34 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -4,7 +4,7 @@
 .. ipython:: python
    :suppress:
 
-   from datetime import datetime, timedelta, time
+   import datetime
    import numpy as np
    import pandas as pd
    from pandas import offsets
@@ -32,7 +32,7 @@ Parsing time series information from various sources and formats
 
 .. ipython:: python
 
-   dti = pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01'), datetime(2018, 1, 1)])
+   dti = pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01'), datetime.datetime(2018, 1, 1)])
    dti
 
 Generate sequences of fixed-frequency dates and time spans
@@ -69,7 +69,7 @@ Performing date and time arithmetic with absolute or relative time increments
     saturday = friday + pd.Timedelta('1 day')
     saturday.day_name()
     # Add 1 business day (Friday --> Monday)
-    monday = friday + pd.tseries.offsets.BDay()
+    monday = friday + pd.offsets.BDay()
     monday.day_name()
 
 pandas provides a relatively compact and self-contained set of tools for
@@ -110,12 +110,14 @@ However, :class:`Series` and :class:`DataFrame` can directly also support the ti
 
    pd.Series(pd.date_range('2000', freq='D', periods=3))
 
-:class:`Series` and :class:`DataFrame` have extended data type support and functionality for ``datetime`` and ``timedelta``
-data when the time data is used as data itself. The ``Period`` and ``DateOffset`` data will be stored as ``object`` data.
+:class:`Series` and :class:`DataFrame` have extended data type support and functionality for ``datetime``, ``timedelta``
+and ``Period`` data when passed into those constructors. ``DateOffset``
+data however will be stored as ``object`` data.
 
 .. ipython:: python
 
    pd.Series(pd.period_range('1/1/2011', freq='M', periods=3))
+   pd.Series([pd.DateOffset(1), pd.DateOffset(2)])
    pd.Series(pd.date_range('1/1/2011', freq='M', periods=3))
 
 Lastly, pandas represents null date times, time deltas, and time spans as ``NaT`` which
@@ -141,7 +143,7 @@ time.
 
 .. ipython:: python
 
-   pd.Timestamp(datetime(2012, 5, 1))
+   pd.Timestamp(datetime.datetime(2012, 5, 1))
    pd.Timestamp('2012-05-01')
    pd.Timestamp(2012, 5, 1)
 
@@ -400,7 +402,7 @@ To generate an index with timestamps, you can use either the ``DatetimeIndex`` o
 
 .. ipython:: python
 
-   dates = [datetime(2012, 5, 1), datetime(2012, 5, 2), datetime(2012, 5, 3)]
+   dates = [datetime.datetime(2012, 5, 1), datetime.datetime(2012, 5, 2), datetime.datetime(2012, 5, 3)]
 
    # Note the frequency information
    index = pd.DatetimeIndex(dates)
@@ -418,8 +420,8 @@ to create a ``DatetimeIndex``. The default frequency for ``date_range`` is a
 
 .. ipython:: python
 
-   start = datetime(2011, 1, 1)
-   end = datetime(2012, 1, 1)
+   start = datetime.datetime(2011, 1, 1)
+   end = datetime.datetime(2012, 1, 1)
 
    index = pd.date_range(start, end)
    index
@@ -486,7 +488,7 @@ used if a custom frequency string is passed.
 
    weekmask = 'Mon Wed Fri'
 
-   holidays = [datetime(2011, 1, 5), datetime(2011, 3, 14)]
+   holidays = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)]
 
    pd.bdate_range(start, end, freq='C', weekmask=weekmask, holidays=holidays)
 
@@ -564,7 +566,7 @@ Dates and strings that parse to timestamps can be passed as indexing parameters:
 
    ts['1/31/2011']
 
-   ts[datetime(2011, 12, 25):]
+   ts[datetime.datetime(2011, 12, 25):]
 
    ts['10/31/2011':'12/31/2011']
 
@@ -716,13 +718,13 @@ These ``Timestamp`` and ``datetime`` objects have exact ``hours, minutes,`` and
 
 .. ipython:: python
 
-   dft[datetime(2013, 1, 1):datetime(2013,2,28)]
+   dft[datetime.datetime(2013, 1, 1):datetime.datetime(2013,2,28)]
 
 With no defaults.
 
 .. ipython:: python
 
-   dft[datetime(2013, 1, 1, 10, 12, 0):datetime(2013, 2, 28, 10, 12, 0)]
+   dft[datetime.datetime(2013, 1, 1, 10, 12, 0):datetime.datetime(2013, 2, 28, 10, 12, 0)]
 
 
 Truncating & Fancy Indexing
@@ -823,120 +825,119 @@ on :ref:`.dt accessors<basics.dt_accessors>`.
 DateOffset Objects
 ------------------
 
-In the preceding examples, we created ``DatetimeIndex`` objects at various
-frequencies by passing in :ref:`frequency strings <timeseries.offset_aliases>`
-like 'M', 'W', and 'BM' to the ``freq`` keyword. Under the hood, these frequency
-strings are being translated into an instance of :class:`DateOffset`,
-which represents a regular frequency increment. Specific offset logic like
-"month", "business day", or "one hour" is represented in its various subclasses.
+In the preceding examples, frequency strings (e.g. ``'D'``) were used to specify
+a frequency that defined:
 
-.. csv-table::
-    :header: "Class name", "Description"
-    :widths: 15, 65
-
-    DateOffset, "Generic offset class, defaults to 1 calendar day"
-    BDay, "business day (weekday)"
-    CDay, "custom business day"
-    Week, "one week, optionally anchored on a day of the week"
-    WeekOfMonth, "the x-th day of the y-th week of each month"
-    LastWeekOfMonth, "the x-th day of the last week of each month"
-    MonthEnd, "calendar month end"
-    MonthBegin, "calendar month begin"
-    BMonthEnd, "business month end"
-    BMonthBegin, "business month begin"
-    CBMonthEnd, "custom business month end"
-    CBMonthBegin, "custom business month begin"
-    SemiMonthEnd, "15th (or other day_of_month) and calendar month end"
-    SemiMonthBegin, "15th (or other day_of_month) and calendar month begin"
-    QuarterEnd, "calendar quarter end"
-    QuarterBegin, "calendar quarter begin"
-    BQuarterEnd, "business quarter end"
-    BQuarterBegin, "business quarter begin"
-    FY5253Quarter, "retail (aka 52-53 week) quarter"
-    YearEnd, "calendar year end"
-    YearBegin, "calendar year begin"
-    BYearEnd, "business year end"
-    BYearBegin, "business year begin"
-    FY5253, "retail (aka 52-53 week) year"
-    BusinessHour, "business hour"
-    CustomBusinessHour, "custom business hour"
-    Hour, "one hour"
-    Minute, "one minute"
-    Second, "one second"
-    Milli, "one millisecond"
-    Micro, "one microsecond"
-    Nano, "one nanosecond"
-
-The basic ``DateOffset`` takes the same arguments as
-``dateutil.relativedelta``, which works as follows:
-
-.. ipython:: python
+* how the date times in :class:`DatetimeIndex` were spaced when using :meth:`date_range`
+* the frequency of a :class:`Period` or :class:`PeriodIndex`
 
-   d = datetime(2008, 8, 18, 9, 0)
-   d + relativedelta(months=4, days=5)
+These frequency strings map to a :class:`DateOffset` object and its subclasses. A :class:`DateOffset`
+is similar to a :class:`Timedelta` that represents a duration of time but follows specific calendar duration rules.
+For example, a :class:`Timedelta` day will always increment ``datetimes`` by 24 hours, while a :class:`DateOffset` day
+will increment ``datetimes`` to the same time the next day whether a day represents 23, 24 or 25 hours due to daylight
+savings time. However, all :class:`DateOffset` subclasses that are an hour or smaller
+(``Hour``, ``Minute``, ``Second``, ``Milli``, ``Micro``, ``Nano``) behave like
+:class:`Timedelta` and respect absolute time.
 
-We could have done the same thing with ``DateOffset``:
+The basic :class:`DateOffset` acts similar to ``dateutil.relativedelta`` (`relativedelta documentation`_)
+that shifts a date time by the corresponding calendar duration specified. The
+arithmetic operator (``+``) or the ``apply`` method can be used to perform the shift.
 
 .. ipython:: python
 
-   from pandas.tseries.offsets import *
-   d + DateOffset(months=4, days=5)
-
-The key features of a ``DateOffset`` object are:
-
-* It can be added / subtracted to/from a datetime object to obtain a
-  shifted date.
-* It can be multiplied by an integer (positive or negative) so that the
-  increment will be applied multiple times.
-* It has :meth:`~pandas.DateOffset.rollforward` and
-  :meth:`~pandas.DateOffset.rollback` methods for moving a date forward or 
-  backward to the next or previous "offset date".
-
-Subclasses of ``DateOffset`` define the ``apply`` function which dictates
-custom date increment logic, such as adding business days:
-
-.. code-block:: python
-
-    class BDay(DateOffset):
-        """DateOffset increments between business days"""
-        def apply(self, other):
-            ...
-
-.. ipython:: python
-
-   d - 5 * BDay()
-   d + BMonthEnd()
-
-The ``rollforward`` and ``rollback`` methods do exactly what you would expect:
-
-.. ipython:: python
-
-   d
-   offset = BMonthEnd()
-   offset.rollforward(d)
-   offset.rollback(d)
-
-It's definitely worth exploring the ``pandas.tseries.offsets`` module and the
-various docstrings for the classes.
+   # This particular day contains a day light savings time transition
+   ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki')
+   # Respects absolute time
+   ts + pd.Timedelta(days=1)
+   # Respects calendar time
+   ts + pd.DateOffset(days=1)
+   friday = pd.Timestamp('2018-01-05')
+   friday.day_name()
+   # Add 2 business days (Friday --> Tuesday)
+   two_business_days = 2 * pd.offsets.BDay()
+   two_business_days.apply(friday)
+   friday + two_business_days
+   (friday + two_business_days).day_name()
+
+Most ``DateOffsets`` have associated frequencies strings, or offset aliases, that can be passed
+into ``freq`` keyword arguments. The available date offsets and associated frequency strings can be found below:
 
-These operations (``apply``, ``rollforward`` and ``rollback``) preserve time 
-(hour, minute, etc) information by default. To reset time, use ``normalize``
-before or after applying the operation (depending on whether you want the
-time information included in the operation.
+.. csv-table::
+    :header: "Date Offset", "Frequency String", "Description"
+    :widths: 15, 15, 65
+
+    ``DateOffset``, None, "Generic offset class, defaults to 1 calendar day"
+    ``BDay`` or ``BusinessDay``, ``'B'``,"business day (weekday)"
+    ``CDay`` or ``CustomBusinessDay``, ``'C'``, "custom business day"
+    ``Week``, ``'W'``, "one week, optionally anchored on a day of the week"
+    ``WeekOfMonth``, ``'WOM'``, "the x-th day of the y-th week of each month"
+    ``LastWeekOfMonth``, ``'LWOM'``, "the x-th day of the last week of each month"
+    ``MonthEnd``, ``'M'``, "calendar month end"
+    ``MonthBegin``, ``'MS'``, "calendar month begin"
+    ``BMonthEnd`` or ``BusinessMonthEnd``, ``'BM'``, "business month end"
+    ``BMonthBegin`` or ``BusinessMonthBegin``, ``'BMS'``, "business month begin"
+    ``CBMonthEnd`` or ``CustomBusinessMonthEnd``, ``'CBM'``, "custom business month end"
+    ``CBMonthBegin`` or ``CustomBusinessMonthBegin``, ``'CBMS'``, "custom business month begin"
+    ``SemiMonthEnd``, ``'SM'``, "15th (or other day_of_month) and calendar month end"
+    ``SemiMonthBegin``, ``'SMS'``, "15th (or other day_of_month) and calendar month begin"
+    ``QuarterEnd``, ``'Q'``, "calendar quarter end"
+    ``QuarterBegin``, ``'QS'``, "calendar quarter begin"
+    ``BQuarterEnd``, ``'BQ``, "business quarter end"
+    ``BQuarterBegin``, ``'BQS'``, "business quarter begin"
+    ``FY5253Quarter``, ``'REQ'``, "retail (aka 52-53 week) quarter"
+    ``YearEnd``, ``'A'``, "calendar year end"
+    ``YearBegin``, ``'AS'`` or ``'BYS'``,"calendar year begin"
+    ``BYearEnd``, ``'BA'``, "business year end"
+    ``BYearBegin``, ``'BAS'``, "business year begin"
+    ``FY5253``, ``'RE'``, "retail (aka 52-53 week) year"
+    ``Easter``, None, "Easter holiday"
+    ``BusinessHour``, ``'BH'``, "business hour"
+    ``CustomBusinessHour``, ``'CBH'``, "custom business hour"
+    ``Day``, ``'D'``, "one absolute day"
+    ``Hour``, ``'H'``, "one hour"
+    ``Minute``, ``'T'`` or ``'min'``,"one minute"
+    ``Second``, ``'S'``, "one second"
+    ``Milli``, ``'L'`` or ``'ms'``, "one millisecond"
+    ``Micro``, ``'U'`` or ``'us'``, "one microsecond"
+    ``Nano``, ``'N'``, "one nanosecond"
+
+``DateOffsets`` additionally have :meth:`rollforward` and :meth:`rollback`
+methods for moving a date forward or backward respectively to a valid offset
+date relative to the offset. For example, business offsets will roll dates
+that land on the weekends (Saturday and Sunday) forward to Monday since
+business offsets operate on the weekdays.
+
+.. ipython:: python
+
+   ts = pd.Timestamp('2018-01-06 00:00:00')
+   ts.day_name()
+   # BusinessHour's valid offset dates are Monday through Friday
+   offset = pd.offsets.BusinessHour(start='09:00')
+   # Bring the date to the closest offset date (Monday)
+   offset.rollforward(ts)
+   # Date is brought to the closest offset date first and then the hour is added
+   ts + offset
+
+These operations preserve time (hour, minute, etc) information by default.
+To reset time to midnight, use :meth:`normalize` before or after applying
+the operation (depending on whether you want the time information included
+in the operation).
 
 .. ipython:: python
 
    ts = pd.Timestamp('2014-01-01 09:00')
-   day = Day()
+   day = pd.offsets.Day()
    day.apply(ts)
    day.apply(ts).normalize()
 
    ts = pd.Timestamp('2014-01-01 22:00')
-   hour = Hour()
+   hour = pd.offsets.Hour()
    hour.apply(ts)
    hour.apply(ts).normalize()
    hour.apply(pd.Timestamp("2014-01-01 23:30")).normalize()
 
+.. _relativedelta documentation: https://dateutil.readthedocs.io/en/stable/relativedelta.html
+
 .. _timeseries.dayvscalendarday:
 
 Day vs. CalendarDay
@@ -968,27 +969,28 @@ particular day of the week:
 
 .. ipython:: python
 
+   d = datetime.datetime(2008, 8, 18, 9, 0)
    d
-   d + Week()
-   d + Week(weekday=4)
-   (d + Week(weekday=4)).weekday()
+   d + pd.offsets.Week()
+   d + pd.offsets.Week(weekday=4)
+   (d + pd.offsets.Week(weekday=4)).weekday()
 
-   d - Week()
+   d - pd.offsets.Week()
 
 The ``normalize`` option will be effective for addition and subtraction.
 
 .. ipython:: python
 
-   d + Week(normalize=True)
-   d - Week(normalize=True)
+   d + pd.offsets.Week(normalize=True)
+   d - pd.offsets.Week(normalize=True)
 
 
 Another example is parameterizing ``YearEnd`` with the specific ending month:
 
 .. ipython:: python
 
-   d + YearEnd()
-   d + YearEnd(month=6)
+   d + pd.offsets.YearEnd()
+   d + pd.offsets.YearEnd(month=6)
 
 
 .. _timeseries.offsetseries:
@@ -1004,9 +1006,9 @@ apply the offset to each element.
    rng = pd.date_range('2012-01-01', '2012-01-03')
    s = pd.Series(rng)
    rng
-   rng + DateOffset(months=2)
-   s + DateOffset(months=2)
-   s - DateOffset(months=2)
+   rng + pd.DateOffset(months=2)
+   s + pd.DateOffset(months=2)
+   s - pd.DateOffset(months=2)
 
 If the offset class maps directly to a ``Timedelta`` (``Day``, ``Hour``,
 ``Minute``, ``Second``, ``Micro``, ``Milli``, ``Nano``) it can be
@@ -1015,10 +1017,10 @@ used exactly like a ``Timedelta`` - see the
 
 .. ipython:: python
 
-   s - Day(2)
+   s - pd.offsets.Day(2)
    td = s - pd.Series(pd.date_range('2011-12-29', '2011-12-31'))
    td
-   td + Minute(15)
+   td + pd.offsets.Minute(15)
 
 Note that some offsets (such as ``BQuarterEnd``) do not have a
 vectorized implementation.  They can still be used but may
@@ -1027,7 +1029,7 @@ calculate significantly slower and will show a ``PerformanceWarning``
 .. ipython:: python
    :okwarning:
 
-   rng + BQuarterEnd()
+   rng + pd.offsets.BQuarterEnd()
 
 
 .. _timeseries.custombusinessdays:
@@ -1049,9 +1051,9 @@ As an interesting example, let's look at Egypt where a Friday-Saturday weekend i
     # They also observe International Workers' Day so let's
     # add that for a couple of years
 
-    holidays = ['2012-05-01', datetime(2013, 5, 1), np.datetime64('2014-05-01')]
+    holidays = ['2012-05-01', datetime.datetime(2013, 5, 1), np.datetime64('2014-05-01')]
     bday_egypt = CustomBusinessDay(holidays=holidays, weekmask=weekmask_egypt)
-    dt = datetime(2013, 4, 30)
+    dt = datetime.datetime(2013, 4, 30)
     dt + 2 * bday_egypt
 
 Let's map to the weekday names:
@@ -1072,7 +1074,7 @@ Holiday calendars can be used to provide the list of holidays.  See the
     bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar())
 
     # Friday before MLK Day
-    dt = datetime(2014, 1, 17)
+    dt = datetime.datetime(2014, 1, 17)
 
     # Tuesday after MLK Day (Monday is skipped because it's a holiday)
     dt + bday_us
@@ -1083,10 +1085,10 @@ in the usual way.
 .. ipython:: python
 
     from pandas.tseries.offsets import CustomBusinessMonthBegin
-    bmth_us = CustomBusinessMonthBegin(calendar=USFederalHolidayCalendar())
+    bmth_us = pd.offsets.CustomBusinessMonthBegin(calendar=USFederalHolidayCalendar())
 
     # Skip new years
-    dt = datetime(2013, 12, 17)
+    dt = datetime.datetime(2013, 12, 17)
     dt + bmth_us
 
     # Define date index with custom offset
@@ -1111,13 +1113,13 @@ allowing to use specific start and end times.
 
 By default, ``BusinessHour`` uses 9:00 - 17:00 as business hours.
 Adding ``BusinessHour`` will increment ``Timestamp`` by hourly frequency.
-If target ``Timestamp`` is out of business hours, move to the next business hour 
-then increment it. If the result exceeds the business hours end, the remaining 
+If target ``Timestamp`` is out of business hours, move to the next business hour
+then increment it. If the result exceeds the business hours end, the remaining
 hours are added to the next business day.
 
 .. ipython:: python
 
-    bh = BusinessHour()
+    bh = pd.offsets.BusinessHour()
     bh
 
     # 2014-08-01 is Friday
@@ -1134,19 +1136,19 @@ hours are added to the next business day.
     pd.Timestamp('2014-08-01 16:30') + bh
 
     # Adding 2 business hours
-    pd.Timestamp('2014-08-01 10:00') + BusinessHour(2)
+    pd.Timestamp('2014-08-01 10:00') + pd.offsets.BusinessHour(2)
 
     # Subtracting 3 business hours
-    pd.Timestamp('2014-08-01 10:00') + BusinessHour(-3)
+    pd.Timestamp('2014-08-01 10:00') + pd.offsets.BusinessHour(-3)
 
-You can also specify ``start`` and ``end`` time by keywords. The argument must 
-be a ``str`` with an ``hour:minute`` representation or a ``datetime.time`` 
-instance. Specifying seconds, microseconds and nanoseconds as business hour 
+You can also specify ``start`` and ``end`` time by keywords. The argument must
+be a ``str`` with an ``hour:minute`` representation or a ``datetime.time``
+instance. Specifying seconds, microseconds and nanoseconds as business hour
 results in ``ValueError``.
 
 .. ipython:: python
 
-    bh = BusinessHour(start='11:00', end=time(20, 0))
+    bh = pd.offsets.BusinessHour(start='11:00', end=datetime.time(20, 0))
     bh
 
     pd.Timestamp('2014-08-01 13:00') + bh
@@ -1159,7 +1161,7 @@ Valid business hours are distinguished by whether it started from valid ``Busine
 
 .. ipython:: python
 
-    bh = BusinessHour(start='17:00', end='09:00')
+    bh = pd.offsets.BusinessHour(start='17:00', end='09:00')
     bh
 
     pd.Timestamp('2014-08-01 17:00') + bh
@@ -1184,22 +1186,22 @@ under the default business hours (9:00 - 17:00), there is no gap (0 minutes) bet
 .. ipython:: python
 
     # This adjusts a Timestamp to business hour edge
-    BusinessHour().rollback(pd.Timestamp('2014-08-02 15:00'))
-    BusinessHour().rollforward(pd.Timestamp('2014-08-02 15:00'))
+    pd.offsets.BusinessHour().rollback(pd.Timestamp('2014-08-02 15:00'))
+    pd.offsets.BusinessHour().rollforward(pd.Timestamp('2014-08-02 15:00'))
 
     # It is the same as BusinessHour().apply(pd.Timestamp('2014-08-01 17:00')).
     # And it is the same as BusinessHour().apply(pd.Timestamp('2014-08-04 09:00'))
-    BusinessHour().apply(pd.Timestamp('2014-08-02 15:00'))
+    pd.offsets.BusinessHour().apply(pd.Timestamp('2014-08-02 15:00'))
 
     # BusinessDay results (for reference)
-    BusinessHour().rollforward(pd.Timestamp('2014-08-02'))
+    pd.offsets.BusinessHour().rollforward(pd.Timestamp('2014-08-02'))
 
     # It is the same as BusinessDay().apply(pd.Timestamp('2014-08-01'))
     # The result is the same as rollworward because BusinessDay never overlap.
-    BusinessHour().apply(pd.Timestamp('2014-08-02'))
+    pd.offsets.BusinessHour().apply(pd.Timestamp('2014-08-02'))
 
-``BusinessHour`` regards Saturday and Sunday as holidays. To use arbitrary 
-holidays, you can use ``CustomBusinessHour`` offset, as explained in the 
+``BusinessHour`` regards Saturday and Sunday as holidays. To use arbitrary
+holidays, you can use ``CustomBusinessHour`` offset, as explained in the
 following subsection.
 
 .. _timeseries.custombusinesshour:
@@ -1216,9 +1218,9 @@ as ``BusinessHour`` except that it skips specified custom holidays.
 .. ipython:: python
 
     from pandas.tseries.holiday import USFederalHolidayCalendar
-    bhour_us = CustomBusinessHour(calendar=USFederalHolidayCalendar())
+    bhour_us = pd.offsets.CustomBusinessHour(calendar=USFederalHolidayCalendar())
     # Friday before MLK Day
-    dt = datetime(2014, 1, 17, 15)
+    dt = datetime.datetime(2014, 1, 17, 15)
 
     dt + bhour_us
 
@@ -1229,7 +1231,7 @@ You can use keyword arguments supported by either ``BusinessHour`` and ``CustomB
 
 .. ipython:: python
 
-    bhour_mon = CustomBusinessHour(start='10:00', weekmask='Tue Wed Thu Fri')
+    bhour_mon = pd.offsets.CustomBusinessHour(start='10:00', weekmask='Tue Wed Thu Fri')
 
     # Monday is skipped because it's a holiday, business hour starts from 10:00
     dt + bhour_mon * 2
@@ -1285,7 +1287,7 @@ most functions:
 
    pd.date_range(start, periods=5, freq='B')
 
-   pd.date_range(start, periods=5, freq=BDay())
+   pd.date_range(start, periods=5, freq=pd.offsets.BDay())
 
 You can combine together day and intraday offsets:
 
@@ -1352,39 +1354,39 @@ anchor point, and moved ``|n|-1`` additional steps forwards or backwards.
 
 .. ipython:: python
 
-   pd.Timestamp('2014-01-02') + MonthBegin(n=1)
-   pd.Timestamp('2014-01-02') + MonthEnd(n=1)
+   pd.Timestamp('2014-01-02') + pd.offsets.MonthBegin(n=1)
+   pd.Timestamp('2014-01-02') + pd.offsets.MonthEnd(n=1)
 
-   pd.Timestamp('2014-01-02') - MonthBegin(n=1)
-   pd.Timestamp('2014-01-02') - MonthEnd(n=1)
+   pd.Timestamp('2014-01-02') - pd.offsets.MonthBegin(n=1)
+   pd.Timestamp('2014-01-02') - pd.offsets.MonthEnd(n=1)
 
-   pd.Timestamp('2014-01-02') + MonthBegin(n=4)
-   pd.Timestamp('2014-01-02') - MonthBegin(n=4)
+   pd.Timestamp('2014-01-02') + pd.offsets.MonthBegin(n=4)
+   pd.Timestamp('2014-01-02') - pd.offsets.MonthBegin(n=4)
 
 If the given date *is* on an anchor point, it is moved ``|n|`` points forwards
 or backwards.
 
 .. ipython:: python
 
-   pd.Timestamp('2014-01-01') + MonthBegin(n=1)
-   pd.Timestamp('2014-01-31') + MonthEnd(n=1)
+   pd.Timestamp('2014-01-01') + pd.offsets.MonthBegin(n=1)
+   pd.Timestamp('2014-01-31') + pd.offsets.MonthEnd(n=1)
 
-   pd.Timestamp('2014-01-01') - MonthBegin(n=1)
-   pd.Timestamp('2014-01-31') - MonthEnd(n=1)
+   pd.Timestamp('2014-01-01') - pd.offsets.MonthBegin(n=1)
+   pd.Timestamp('2014-01-31') - pd.offsets.MonthEnd(n=1)
 
-   pd.Timestamp('2014-01-01') + MonthBegin(n=4)
-   pd.Timestamp('2014-01-31') - MonthBegin(n=4)
+   pd.Timestamp('2014-01-01') + pd.offsets.MonthBegin(n=4)
+   pd.Timestamp('2014-01-31') - pd.offsets.MonthBegin(n=4)
 
 For the case when ``n=0``, the date is not moved if on an anchor point, otherwise
 it is rolled forward to the next anchor point.
 
 .. ipython:: python
 
-   pd.Timestamp('2014-01-02') + MonthBegin(n=0)
-   pd.Timestamp('2014-01-02') + MonthEnd(n=0)
+   pd.Timestamp('2014-01-02') + pd.offsets.MonthBegin(n=0)
+   pd.Timestamp('2014-01-02') + pd.offsets.MonthEnd(n=0)
 
-   pd.Timestamp('2014-01-01') + MonthBegin(n=0)
-   pd.Timestamp('2014-01-31') + MonthEnd(n=0)
+   pd.Timestamp('2014-01-01') + pd.offsets.MonthBegin(n=0)
+   pd.Timestamp('2014-01-31') + pd.offsets.MonthEnd(n=0)
 
 .. _timeseries.holiday:
 
@@ -1427,10 +1429,10 @@ An example of how holidays and holiday calendars are defined:
             USMemorialDay,
             Holiday('July 4th', month=7, day=4, observance=nearest_workday),
             Holiday('Columbus Day', month=10, day=1,
-                offset=DateOffset(weekday=MO(2))), #same as 2*Week(weekday=2)
+                offset=pd.DateOffset(weekday=MO(2))), #same as 2*Week(weekday=2)
             ]
     cal = ExampleCalendar()
-    cal.holidays(datetime(2012, 1, 1), datetime(2012, 12, 31))
+    cal.holidays(datetime.datetime(2012, 1, 1), datetime.datetime(2012, 12, 31))
 
 Using this calendar, creating an index or doing offset arithmetic skips weekends
 and holidays (i.e., Memorial Day/July 4th).  For example, the below defines
@@ -1444,10 +1446,10 @@ or ``Timestamp`` objects.
     pd.DatetimeIndex(start='7/1/2012', end='7/10/2012',
         freq=CDay(calendar=cal)).to_pydatetime()
     offset = CustomBusinessDay(calendar=cal)
-    datetime(2012, 5, 25) + offset
-    datetime(2012, 7, 3) + offset
-    datetime(2012, 7, 3) + 2 * offset
-    datetime(2012, 7, 6) + offset
+    datetime.datetime(2012, 5, 25) + offset
+    datetime.datetime(2012, 7, 3) + offset
+    datetime.datetime(2012, 7, 3) + 2 * offset
+    datetime.datetime(2012, 7, 6) + offset
 
 Ranges are defined by the ``start_date`` and ``end_date`` class attributes
 of ``AbstractHolidayCalendar``.  The defaults are shown below.
@@ -1462,8 +1464,8 @@ datetime/Timestamp/string.
 
 .. ipython:: python
 
-    AbstractHolidayCalendar.start_date = datetime(2012, 1, 1)
-    AbstractHolidayCalendar.end_date = datetime(2012, 12, 31)
+    AbstractHolidayCalendar.start_date = datetime.datetime(2012, 1, 1)
+    AbstractHolidayCalendar.end_date = datetime.datetime(2012, 12, 31)
     cal.holidays()
 
 Every calendar class is accessible by name using the ``get_calendar`` function
@@ -1490,7 +1492,7 @@ Shifting / Lagging
 ~~~~~~~~~~~~~~~~~~
 
 One may want to *shift* or *lag* the values in a time series back and forward in
-time. The method for this is :meth:`~Series.shift`, which is available on all of 
+time. The method for this is :meth:`~Series.shift`, which is available on all of
 the pandas objects.
 
 .. ipython:: python
@@ -1500,16 +1502,16 @@ the pandas objects.
    ts.shift(1)
 
 The ``shift`` method accepts an ``freq`` argument which can accept a
-``DateOffset`` class or other ``timedelta``-like object or also an 
+``DateOffset`` class or other ``timedelta``-like object or also an
 :ref:`offset alias <timeseries.offset_aliases>`:
 
 .. ipython:: python
 
-   ts.shift(5, freq=offsets.BDay())
+   ts.shift(5, freq=pd.offsets.BDay())
    ts.shift(5, freq='BM')
 
 Rather than changing the alignment of the data and the index, ``DataFrame`` and
-``Series`` objects also have a :meth:`~Series.tshift` convenience method that 
+``Series`` objects also have a :meth:`~Series.tshift` convenience method that
 changes all the dates in the index by a specified number of offsets:
 
 .. ipython:: python
@@ -1522,35 +1524,35 @@ is not being realigned.
 Frequency Conversion
 ~~~~~~~~~~~~~~~~~~~~
 
-The primary function for changing frequencies is the :meth:`~Series.asfreq` 
-method. For a ``DatetimeIndex``, this is basically just a thin, but convenient 
-wrapper around :meth:`~Series.reindex`  which generates a ``date_range`` and 
+The primary function for changing frequencies is the :meth:`~Series.asfreq`
+method. For a ``DatetimeIndex``, this is basically just a thin, but convenient
+wrapper around :meth:`~Series.reindex`  which generates a ``date_range`` and
 calls ``reindex``.
 
 .. ipython:: python
 
-   dr = pd.date_range('1/1/2010', periods=3, freq=3 * offsets.BDay())
+   dr = pd.date_range('1/1/2010', periods=3, freq=3 * pd.offsets.BDay())
    ts = pd.Series(randn(3), index=dr)
    ts
-   ts.asfreq(BDay())
+   ts.asfreq(pd.offsets.BDay())
 
 ``asfreq`` provides a further convenience so you can specify an interpolation
 method for any gaps that may appear after the frequency conversion.
 
 .. ipython:: python
 
-   ts.asfreq(BDay(), method='pad')
+   ts.asfreq(pd.offsets.BDay(), method='pad')
 
 Filling Forward / Backward
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Related to ``asfreq`` and ``reindex`` is :meth:`~Series.fillna`, which is 
+Related to ``asfreq`` and ``reindex`` is :meth:`~Series.fillna`, which is
 documented in the :ref:`missing data section <missing_data.fillna>`.
 
 Converting to Python Datetimes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-``DatetimeIndex`` can be converted to an array of Python native 
+``DatetimeIndex`` can be converted to an array of Python native
 :py:class:`datetime.datetime` objects using the ``to_pydatetime`` method.
 
 .. _timeseries.resampling:
@@ -1563,13 +1565,13 @@ Resampling
    The interface to ``.resample`` has changed in 0.18.0 to be more groupby-like and hence more flexible.
    See the :ref:`whatsnew docs <whatsnew_0180.breaking.resample>` for a comparison with prior versions.
 
-Pandas has a simple, powerful, and efficient functionality for performing 
-resampling operations during frequency conversion (e.g., converting secondly 
-data into 5-minutely data). This is extremely common in, but not limited to, 
+Pandas has a simple, powerful, and efficient functionality for performing
+resampling operations during frequency conversion (e.g., converting secondly
+data into 5-minutely data). This is extremely common in, but not limited to,
 financial applications.
 
-:meth:`~Series.resample` is a time-based groupby, followed by a reduction method 
-on each of its groups. See some :ref:`cookbook examples <cookbook.resample>` for 
+:meth:`~Series.resample` is a time-based groupby, followed by a reduction method
+on each of its groups. See some :ref:`cookbook examples <cookbook.resample>` for
 some advanced strategies.
 
 Starting in version 0.18.1, the ``resample()`` function can be used directly from
@@ -1577,7 +1579,7 @@ Starting in version 0.18.1, the ``resample()`` function can be used directly fro
 
 .. note::
 
-   ``.resample()`` is similar to using a :meth:`~Series.rolling` operation with 
+   ``.resample()`` is similar to using a :meth:`~Series.rolling` operation with
    a time-based offset, see a discussion :ref:`here <stats.moments.ts-versus-resampling>`.
 
 Basics
@@ -1632,8 +1634,8 @@ labels.
 
 .. note::
 
-    The default values for ``label`` and ``closed`` is 'left' for all 
-    frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' 
+    The default values for ``label`` and ``closed`` is 'left' for all
+    frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W'
     which all have a default of 'right'.
 
     .. ipython:: python
@@ -1680,9 +1682,9 @@ Sparse Resampling
 ~~~~~~~~~~~~~~~~~
 
 Sparse timeseries are the ones where you have a lot fewer points relative
-to the amount of time you are looking to resample. Naively upsampling a sparse 
-series can potentially generate lots of intermediate values. When you don't want 
-to use a method to fill these values, e.g. ``fill_method`` is ``None``, then 
+to the amount of time you are looking to resample. Naively upsampling a sparse
+series can potentially generate lots of intermediate values. When you don't want
+to use a method to fill these values, e.g. ``fill_method`` is ``None``, then
 intermediate values will be filled with ``NaN``.
 
 Since ``resample`` is a time-based groupby, the following is a method to efficiently
@@ -1845,13 +1847,13 @@ If ``Period`` freq is daily or higher (``D``, ``H``, ``T``, ``S``, ``L``, ``U``,
 .. ipython:: python
 
    p = pd.Period('2014-07-01 09:00', freq='H')
-   p + Hour(2)
-   p + timedelta(minutes=120)
+   p + pd.offsets.Hour(2)
+   p + datetime.timedelta(minutes=120)
    p + np.timedelta64(7200, 's')
 
 .. code-block:: ipython
 
-   In [1]: p + Minute(5)
+   In [1]: p + pd.offsets.Minute(5)
    Traceback
       ...
    ValueError: Input has different freq from Period(freq=H)
@@ -1861,11 +1863,11 @@ If ``Period`` has other frequencies, only the same ``offsets`` can be added. Oth
 .. ipython:: python
 
    p = pd.Period('2014-07', freq='M')
-   p + MonthEnd(3)
+   p + pd.offsets.MonthEnd(3)
 
 .. code-block:: ipython
 
-   In [1]: p + MonthBegin(3)
+   In [1]: p + pd.offsets.MonthBegin(3)
    Traceback
       ...
    ValueError: Input has different freq from Period(freq=M)
@@ -1923,11 +1925,11 @@ objects:
 
    idx = pd.period_range('2014-07-01 09:00', periods=5, freq='H')
    idx
-   idx + Hour(2)
+   idx + pd.offsets.Hour(2)
 
    idx = pd.period_range('2014-07', periods=5, freq='M')
    idx
-   idx + MonthEnd(3)
+   idx + pd.offsets.MonthEnd(3)
 
 ``PeriodIndex`` has its own dtype named ``period``, refer to :ref:`Period Dtypes <timeseries.period_dtype>`.
 
@@ -1977,7 +1979,7 @@ You can pass in dates and strings to ``Series`` and ``DataFrame`` with ``PeriodI
 
    ps['2011-01']
 
-   ps[datetime(2011, 12, 25):]
+   ps[datetime.datetime(2011, 12, 25):]
 
    ps['10/31/2011':'12/31/2011']
 
@@ -2373,7 +2375,7 @@ can be controlled by the ``nonexistent`` argument. The following options are ava
 
 .. ipython:: python
 
-    dti = pd.date_range(start='2015-03-29 01:30:00', periods=3, freq='H')
+    dti = pd.date_range(start='2015-03-29 02:30:00', periods=3, freq='H')
     # 2:30 is a nonexistent time
 
 Localization of nonexistent times will raise an error by default.

From fe2969ee8a3f2ad937a841f3975e43d5e2514841 Mon Sep 17 00:00:00 2001
From: Justin Zheng <justinzhengbc@gmail.com>
Date: Sun, 2 Dec 2018 08:22:00 -0800
Subject: [PATCH 52/78] GH20591 read_csv raise ValueError for bool columns with
 missing values (C engine) (#23968)

---
 doc/source/whatsnew/v0.24.0.rst          |  1 +
 pandas/_libs/parsers.pyx                 |  4 ++++
 pandas/io/parsers.py                     | 16 +++++++++++++---
 pandas/tests/io/parser/test_na_values.py | 18 ++++++++++++++++++
 4 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index fc505128a2e202..75d2196fe0b1eb 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -376,6 +376,7 @@ Backwards incompatible API changes
 - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
 - Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
 - ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
+- :meth:`read_csv` will now raise a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`)
 
 .. _whatsnew_0240.api_breaking.deps:
 
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 1dc71264c94ddd..a459057555cf3a 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -1245,6 +1245,10 @@ cdef class TextReader:
             result, na_count = _try_bool_flex(self.parser, i, start, end,
                                               na_filter, na_hashset,
                                               self.true_set, self.false_set)
+            if user_dtype and na_count is not None:
+                if na_count > 0:
+                    raise ValueError("Bool column has NA values in "
+                                     "column {column}".format(column=i))
             return result, na_count
 
         elif dtype.kind == 'S':
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 7ade35f93a858a..aadca1fcb3bef9 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -27,9 +27,9 @@
 
 from pandas.core.dtypes.cast import astype_nansafe
 from pandas.core.dtypes.common import (
-    ensure_object, is_categorical_dtype, is_dtype_equal, is_float, is_integer,
-    is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
-    is_string_dtype)
+    ensure_object, is_bool_dtype, is_categorical_dtype, is_dtype_equal,
+    is_float, is_integer, is_integer_dtype, is_list_like, is_object_dtype,
+    is_scalar, is_string_dtype)
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.missing import isna
 
@@ -1669,6 +1669,16 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
 
                 # type specified in dtype param
                 if cast_type and not is_dtype_equal(cvals, cast_type):
+                    try:
+                        if (is_bool_dtype(cast_type) and
+                                not is_categorical_dtype(cast_type)
+                                and na_count > 0):
+                            raise ValueError("Bool column has NA values in "
+                                             "column {column}"
+                                             .format(column=c))
+                    except (AttributeError, TypeError):
+                        # invalid input to is_bool_dtype
+                        pass
                     cvals = self._cast_types(cvals, cast_type, c)
 
             result[c] = cvals
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
index 921984bc44e50c..1b6d2ee8a062e3 100644
--- a/pandas/tests/io/parser/test_na_values.py
+++ b/pandas/tests/io/parser/test_na_values.py
@@ -421,3 +421,21 @@ def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
 
     result = parser.read_csv(StringIO(data), na_filter=na_filter, dtype=str)
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data, na_values", [
+    ("false,1\n,1\ntrue", None),
+    ("false,1\nnull,1\ntrue", None),
+    ("false,1\nnan,1\ntrue", None),
+    ("false,1\nfoo,1\ntrue", 'foo'),
+    ("false,1\nfoo,1\ntrue", ['foo']),
+    ("false,1\nfoo,1\ntrue", {'a': 'foo'}),
+])
+def test_cast_NA_to_bool_raises_error(all_parsers, data, na_values):
+    parser = all_parsers
+    msg = ("(Bool column has NA values in column [0a])|"
+           "(cannot safely convert passed user dtype of "
+           "bool for object dtyped data in column 0)")
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=None, names=['a', 'b'],
+                        dtype={'a': 'bool'}, na_values=na_values)

From 96e0bd71dd9fa8541fcef2a914114813e8db9d83 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Sun, 2 Dec 2018 08:39:31 -0800
Subject: [PATCH 53/78] CLN: Adjust cdef types to fix MacPython build (#24041)

---
 pandas/_libs/tslibs/conversion.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 67c2793e4bcefd..65f3894cb631a5 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -887,8 +887,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
         int64_t *tdata
         int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
         int64_t HOURS_NS = HOUR_SECONDS * 1000000000
-        ndarray[int64_t] trans, result, result_a, result_b, dst_hours
-        ndarray[int64_t] trans_idx, grp, delta, a_idx, b_idx, one_diff
+        ndarray[int64_t] trans, result, result_a, result_b, dst_hours, delta
+        ndarray trans_idx, grp, a_idx, b_idx, one_diff
         npy_datetimestruct dts
         bint infer_dst = False, is_dst = False, fill = False
         bint shift = False, fill_nonexist = False

From 26d7c5265d2c784dc04980f355cb115c4e692759 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 2 Dec 2018 08:40:13 -0800
Subject: [PATCH 54/78] do NPY_NAT check inside get_datetime64_nanos (#24031)

---
 pandas/_libs/tslib.pyx             | 17 +++++++----------
 pandas/_libs/tslibs/conversion.pyx | 11 ++++++-----
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 609608a0948c5d..1205ebbe311e2a 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -571,16 +571,13 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
 
             elif is_datetime64_object(val):
                 seen_datetime = 1
-                if get_datetime64_value(val) == NPY_NAT:
-                    iresult[i] = NPY_NAT
-                else:
-                    try:
-                        iresult[i] = get_datetime64_nanos(val)
-                    except OutOfBoundsDatetime:
-                        if is_coerce:
-                            iresult[i] = NPY_NAT
-                            continue
-                        raise
+                try:
+                    iresult[i] = get_datetime64_nanos(val)
+                except OutOfBoundsDatetime:
+                    if is_coerce:
+                        iresult[i] = NPY_NAT
+                        continue
+                    raise
 
             elif is_integer_object(val) or is_float_object(val):
                 # these must be ns unit by-definition
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 65f3894cb631a5..4a34065fe471f0 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -62,8 +62,11 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
         NPY_DATETIMEUNIT unit
         npy_datetime ival
 
-    unit = get_datetime64_unit(val)
     ival = get_datetime64_value(val)
+    if ival == NPY_NAT:
+        return NPY_NAT
+
+    unit = get_datetime64_unit(val)
 
     if unit != NPY_FR_ns:
         pandas_datetime_to_datetimestruct(ival, unit, &dts)
@@ -283,10 +286,8 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
     if ts is None or ts is NaT:
         obj.value = NPY_NAT
     elif is_datetime64_object(ts):
-        if ts.view('i8') == NPY_NAT:
-            obj.value = NPY_NAT
-        else:
-            obj.value = get_datetime64_nanos(ts)
+        obj.value = get_datetime64_nanos(ts)
+        if obj.value != NPY_NAT:
             dt64_to_dtstruct(obj.value, &obj.dts)
     elif is_integer_object(ts):
         if ts == NPY_NAT:

From 18b68c778a8aad66cf20982b14243845d29055a7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Sun, 2 Dec 2018 10:56:51 -0600
Subject: [PATCH 55/78] REF: Refactor Date/TimeLikeOps (#24038)

---
 pandas/core/arrays/datetimelike.py  | 224 +++++++++++++++++++++++++++-
 pandas/core/arrays/datetimes.py     |   4 +-
 pandas/core/arrays/timedeltas.py    |   2 +-
 pandas/core/indexes/datetimelike.py | 222 +--------------------------
 pandas/core/indexes/datetimes.py    |   6 +-
 pandas/core/indexes/period.py       |   4 +-
 pandas/core/indexes/timedeltas.py   |   5 +-
 7 files changed, 234 insertions(+), 233 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 83ee335aa54657..a6f254c79fb518 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -10,11 +10,12 @@
 from pandas._libs.tslibs.period import (
     DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period)
 from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
-from pandas._libs.tslibs.timestamps import maybe_integer_op_deprecated
+from pandas._libs.tslibs.timestamps import (
+    RoundTo, maybe_integer_op_deprecated, round_nsint64)
 import pandas.compat as compat
 from pandas.errors import (
     AbstractMethodError, NullFrequencyError, PerformanceWarning)
-from pandas.util._decorators import deprecate_kwarg
+from pandas.util._decorators import Appender, deprecate_kwarg
 
 from pandas.core.dtypes.common import (
     is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype,
@@ -80,6 +81,189 @@ def _get_attributes_dict(self):
         return {k: getattr(self, k, None) for k in self._attributes}
 
 
+class DatelikeOps(object):
+    """
+    Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.
+    """
+
+    def strftime(self, date_format):
+        from pandas import Index
+        return Index(self.format(date_format=date_format),
+                     dtype=compat.text_type)
+    strftime.__doc__ = """
+    Convert to Index using specified date_format.
+
+    Return an Index of formatted strings specified by date_format, which
+    supports the same string format as the python standard library. Details
+    of the string format can be found in `python string format doc <{0}>`__
+
+    Parameters
+    ----------
+    date_format : str
+        Date format string (e.g. "%Y-%m-%d").
+
+    Returns
+    -------
+    Index
+        Index of formatted strings
+
+    See Also
+    --------
+    to_datetime : Convert the given argument to datetime.
+    DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.
+    DatetimeIndex.round : Round the DatetimeIndex to the specified freq.
+    DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.
+
+    Examples
+    --------
+    >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),
+    ...                     periods=3, freq='s')
+    >>> rng.strftime('%B %d, %Y, %r')
+    Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
+           'March 10, 2018, 09:00:02 AM'],
+          dtype='object')
+    """.format("https://docs.python.org/3/library/datetime.html"
+               "#strftime-and-strptime-behavior")
+
+
+class TimelikeOps(object):
+    """
+    Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
+    """
+
+    _round_doc = (
+        """
+        Perform {op} operation on the data to the specified `freq`.
+
+        Parameters
+        ----------
+        freq : str or Offset
+            The frequency level to {op} the index to. Must be a fixed
+            frequency like 'S' (second) not 'ME' (month end). See
+            :ref:`frequency aliases <timeseries.offset_aliases>` for
+            a list of possible `freq` values.
+        ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
+            Only relevant for DatetimeIndex:
+
+            - 'infer' will attempt to infer fall dst-transition hours based on
+              order
+            - bool-ndarray where True signifies a DST time, False designates
+              a non-DST time (note that this flag is only applicable for
+              ambiguous times)
+            - 'NaT' will return NaT where there are ambiguous times
+            - 'raise' will raise an AmbiguousTimeError if there are ambiguous
+              times
+
+            .. versionadded:: 0.24.0
+        nonexistent : 'shift', 'NaT', default 'raise'
+            A nonexistent time does not exist in a particular timezone
+            where clocks moved forward due to DST.
+
+            - 'shift' will shift the nonexistent time forward to the closest
+              existing time
+            - 'NaT' will return NaT where there are nonexistent times
+            - 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times
+
+            .. versionadded:: 0.24.0
+
+        Returns
+        -------
+        DatetimeIndex, TimedeltaIndex, or Series
+            Index of the same type for a DatetimeIndex or TimedeltaIndex,
+            or a Series with the same index for a Series.
+
+        Raises
+        ------
+        ValueError if the `freq` cannot be converted.
+
+        Examples
+        --------
+        **DatetimeIndex**
+
+        >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
+        >>> rng
+        DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00',
+                       '2018-01-01 12:01:00'],
+                      dtype='datetime64[ns]', freq='T')
+        """)
+
+    _round_example = (
+        """>>> rng.round('H')
+        DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
+                       '2018-01-01 12:00:00'],
+                      dtype='datetime64[ns]', freq=None)
+
+        **Series**
+
+        >>> pd.Series(rng).dt.round("H")
+        0   2018-01-01 12:00:00
+        1   2018-01-01 12:00:00
+        2   2018-01-01 12:00:00
+        dtype: datetime64[ns]
+        """)
+
+    _floor_example = (
+        """>>> rng.floor('H')
+        DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
+                       '2018-01-01 12:00:00'],
+                      dtype='datetime64[ns]', freq=None)
+
+        **Series**
+
+        >>> pd.Series(rng).dt.floor("H")
+        0   2018-01-01 11:00:00
+        1   2018-01-01 12:00:00
+        2   2018-01-01 12:00:00
+        dtype: datetime64[ns]
+        """
+    )
+
+    _ceil_example = (
+        """>>> rng.ceil('H')
+        DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
+                       '2018-01-01 13:00:00'],
+                      dtype='datetime64[ns]', freq=None)
+
+        **Series**
+
+        >>> pd.Series(rng).dt.ceil("H")
+        0   2018-01-01 12:00:00
+        1   2018-01-01 12:00:00
+        2   2018-01-01 13:00:00
+        dtype: datetime64[ns]
+        """
+    )
+
+    def _round(self, freq, mode, ambiguous, nonexistent):
+        # round the local times
+        values = _ensure_datetimelike_to_i8(self)
+        result = round_nsint64(values, mode, freq)
+        result = self._maybe_mask_results(result, fill_value=NaT)
+
+        attribs = self._get_attributes_dict()
+        attribs['freq'] = None
+        if 'tz' in attribs:
+            attribs['tz'] = None
+        return self._ensure_localized(
+            self._shallow_copy(result, **attribs), ambiguous, nonexistent
+        )
+
+    @Appender((_round_doc + _round_example).format(op="round"))
+    def round(self, freq, ambiguous='raise', nonexistent='raise'):
+        return self._round(
+            freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent
+        )
+
+    @Appender((_round_doc + _floor_example).format(op="floor"))
+    def floor(self, freq, ambiguous='raise', nonexistent='raise'):
+        return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
+
+    @Appender((_round_doc + _ceil_example).format(op="ceil"))
+    def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
+        return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
+
+
 class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin):
     """
     Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
@@ -1023,3 +1207,39 @@ def validate_dtype_freq(dtype, freq):
             raise IncompatibleFrequency('specified freq and dtype '
                                         'are different')
     return freq
+
+
+def _ensure_datetimelike_to_i8(other, to_utc=False):
+    """
+    Helper for coercing an input scalar or array to i8.
+
+    Parameters
+    ----------
+    other : 1d array
+    to_utc : bool, default False
+        If True, convert the values to UTC before extracting the i8 values
+        If False, extract the i8 values directly.
+
+    Returns
+    -------
+    i8 1d array
+    """
+    from pandas import Index
+    from pandas.core.arrays import PeriodArray
+
+    if lib.is_scalar(other) and isna(other):
+        return iNaT
+    elif isinstance(other, (PeriodArray, ABCIndexClass)):
+        # convert tz if needed
+        if getattr(other, 'tz', None) is not None:
+            if to_utc:
+                other = other.tz_convert('UTC')
+            else:
+                other = other.tz_localize(None)
+    else:
+        try:
+            return np.array(other, copy=False).view('i8')
+        except TypeError:
+            # period array cannot be coerced to int
+            other = Index(other)
+    return other.asi8
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 4d3caaacca1c16..050442c530314a 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -156,7 +156,9 @@ def wrapper(self, other):
     return compat.set_function_name(wrapper, opname, cls)
 
 
-class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin):
+class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
+                         dtl.TimelikeOps,
+                         dtl.DatelikeOps):
     """
     Assumes that subclass __new__/__init__ defines:
         tz
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 856a01e41ce13e..6a7ce7033efa0f 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -129,7 +129,7 @@ def method(self, other):
     return method
 
 
-class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin):
+class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
     _typ = "timedeltaarray"
     __array_priority__ = 1000
 
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 5e25efe77d8b95..0dedd8fe1cf4b3 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -7,8 +7,6 @@
 import numpy as np
 
 from pandas._libs import NaT, iNaT, lib
-from pandas._libs.tslibs.timestamps import RoundTo, round_nsint64
-import pandas.compat as compat
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import Appender, cache_readonly
@@ -19,11 +17,10 @@
     is_integer, is_integer_dtype, is_list_like, is_object_dtype,
     is_period_dtype, is_scalar, is_string_dtype)
 from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
-from pandas.core.dtypes.missing import isna
 
 from pandas.core import algorithms, ops
-from pandas.core.arrays import PeriodArray
-from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
+from pandas.core.arrays.datetimelike import (
+    DatetimeLikeArrayMixin, _ensure_datetimelike_to_i8)
 import pandas.core.indexes.base as ibase
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.core.tools.timedeltas import to_timedelta
@@ -33,188 +30,6 @@
 _index_doc_kwargs = dict(ibase._index_doc_kwargs)
 
 
-class DatelikeOps(object):
-    """
-    Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.
-    """
-
-    def strftime(self, date_format):
-        return Index(self.format(date_format=date_format),
-                     dtype=compat.text_type)
-    strftime.__doc__ = """
-    Convert to Index using specified date_format.
-
-    Return an Index of formatted strings specified by date_format, which
-    supports the same string format as the python standard library. Details
-    of the string format can be found in `python string format doc <{0}>`__
-
-    Parameters
-    ----------
-    date_format : str
-        Date format string (e.g. "%Y-%m-%d").
-
-    Returns
-    -------
-    Index
-        Index of formatted strings
-
-    See Also
-    --------
-    to_datetime : Convert the given argument to datetime.
-    DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.
-    DatetimeIndex.round : Round the DatetimeIndex to the specified freq.
-    DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.
-
-    Examples
-    --------
-    >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),
-    ...                     periods=3, freq='s')
-    >>> rng.strftime('%B %d, %Y, %r')
-    Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
-           'March 10, 2018, 09:00:02 AM'],
-          dtype='object')
-    """.format("https://docs.python.org/3/library/datetime.html"
-               "#strftime-and-strptime-behavior")
-
-
-class TimelikeOps(object):
-    """
-    Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
-    """
-
-    _round_doc = (
-        """
-        Perform {op} operation on the data to the specified `freq`.
-
-        Parameters
-        ----------
-        freq : str or Offset
-            The frequency level to {op} the index to. Must be a fixed
-            frequency like 'S' (second) not 'ME' (month end). See
-            :ref:`frequency aliases <timeseries.offset_aliases>` for
-            a list of possible `freq` values.
-        ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
-            Only relevant for DatetimeIndex:
-
-            - 'infer' will attempt to infer fall dst-transition hours based on
-              order
-            - bool-ndarray where True signifies a DST time, False designates
-              a non-DST time (note that this flag is only applicable for
-              ambiguous times)
-            - 'NaT' will return NaT where there are ambiguous times
-            - 'raise' will raise an AmbiguousTimeError if there are ambiguous
-              times
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift', 'NaT', default 'raise'
-            A nonexistent time does not exist in a particular timezone
-            where clocks moved forward due to DST.
-
-            - 'shift' will shift the nonexistent time forward to the closest
-              existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
-
-            .. versionadded:: 0.24.0
-
-        Returns
-        -------
-        DatetimeIndex, TimedeltaIndex, or Series
-            Index of the same type for a DatetimeIndex or TimedeltaIndex,
-            or a Series with the same index for a Series.
-
-        Raises
-        ------
-        ValueError if the `freq` cannot be converted.
-
-        Examples
-        --------
-        **DatetimeIndex**
-
-        >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
-        >>> rng
-        DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00',
-                       '2018-01-01 12:01:00'],
-                      dtype='datetime64[ns]', freq='T')
-        """)
-
-    _round_example = (
-        """>>> rng.round('H')
-        DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
-                       '2018-01-01 12:00:00'],
-                      dtype='datetime64[ns]', freq=None)
-
-        **Series**
-
-        >>> pd.Series(rng).dt.round("H")
-        0   2018-01-01 12:00:00
-        1   2018-01-01 12:00:00
-        2   2018-01-01 12:00:00
-        dtype: datetime64[ns]
-        """)
-
-    _floor_example = (
-        """>>> rng.floor('H')
-        DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
-                       '2018-01-01 12:00:00'],
-                      dtype='datetime64[ns]', freq=None)
-
-        **Series**
-
-        >>> pd.Series(rng).dt.floor("H")
-        0   2018-01-01 11:00:00
-        1   2018-01-01 12:00:00
-        2   2018-01-01 12:00:00
-        dtype: datetime64[ns]
-        """
-    )
-
-    _ceil_example = (
-        """>>> rng.ceil('H')
-        DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
-                       '2018-01-01 13:00:00'],
-                      dtype='datetime64[ns]', freq=None)
-
-        **Series**
-
-        >>> pd.Series(rng).dt.ceil("H")
-        0   2018-01-01 12:00:00
-        1   2018-01-01 12:00:00
-        2   2018-01-01 13:00:00
-        dtype: datetime64[ns]
-        """
-    )
-
-    def _round(self, freq, mode, ambiguous, nonexistent):
-        # round the local times
-        values = _ensure_datetimelike_to_i8(self)
-        result = round_nsint64(values, mode, freq)
-        result = self._maybe_mask_results(result, fill_value=NaT)
-
-        attribs = self._get_attributes_dict()
-        attribs['freq'] = None
-        if 'tz' in attribs:
-            attribs['tz'] = None
-        return self._ensure_localized(
-            self._shallow_copy(result, **attribs), ambiguous, nonexistent
-        )
-
-    @Appender((_round_doc + _round_example).format(op="round"))
-    def round(self, freq, ambiguous='raise', nonexistent='raise'):
-        return self._round(
-            freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent
-        )
-
-    @Appender((_round_doc + _floor_example).format(op="floor"))
-    def floor(self, freq, ambiguous='raise', nonexistent='raise'):
-        return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
-
-    @Appender((_round_doc + _ceil_example).format(op="ceil"))
-    def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
-        return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
-
-
 class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
     """ common ops mixin to support a unified interface datetimelike Index """
 
@@ -754,39 +569,6 @@ def _time_shift(self, periods, freq=None):
         return result
 
 
-def _ensure_datetimelike_to_i8(other, to_utc=False):
-    """
-    Helper for coercing an input scalar or array to i8.
-
-    Parameters
-    ----------
-    other : 1d array
-    to_utc : bool, default False
-        If True, convert the values to UTC before extracting the i8 values
-        If False, extract the i8 values directly.
-
-    Returns
-    -------
-    i8 1d array
-    """
-    if is_scalar(other) and isna(other):
-        return iNaT
-    elif isinstance(other, (PeriodArray, ABCIndexClass)):
-        # convert tz if needed
-        if getattr(other, 'tz', None) is not None:
-            if to_utc:
-                other = other.tz_convert('UTC')
-            else:
-                other = other.tz_localize(None)
-    else:
-        try:
-            return np.array(other, copy=False).view('i8')
-        except TypeError:
-            # period array cannot be coerced to int
-            other = Index(other)
-    return other.asi8
-
-
 def wrap_arithmetic_op(self, other, result):
     if result is NotImplemented:
         return NotImplemented
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 01901d022da322..8f36096d128c29 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -31,8 +31,7 @@
 import pandas.core.common as com
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.core.indexes.datetimelike import (
-    DatelikeOps, DatetimeIndexOpsMixin, TimelikeOps, wrap_array_method,
-    wrap_field_accessor)
+    DatetimeIndexOpsMixin, wrap_array_method, wrap_field_accessor)
 from pandas.core.indexes.numeric import Int64Index
 from pandas.core.ops import get_op_result_name
 import pandas.core.tools.datetimes as tools
@@ -62,8 +61,7 @@ def _new_DatetimeIndex(cls, d):
     return result
 
 
-class DatetimeIndex(DatetimeArray, DatelikeOps, TimelikeOps,
-                    DatetimeIndexOpsMixin, Int64Index):
+class DatetimeIndex(DatetimeArray, DatetimeIndexOpsMixin, Int64Index):
     """
     Immutable ndarray of datetime64 data, represented internally as int64, and
     which can be boxed to Timestamp objects that are subclasses of datetime and
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 56df454bddf1c1..3cdefb02ef8b3a 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -26,7 +26,7 @@
 import pandas.core.indexes.base as ibase
 from pandas.core.indexes.base import _index_shared_docs, ensure_index
 from pandas.core.indexes.datetimelike import (
-    DatelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op)
+    DatetimeIndexOpsMixin, wrap_arithmetic_op)
 from pandas.core.indexes.datetimes import DatetimeIndex, Index, Int64Index
 from pandas.core.missing import isna
 from pandas.core.ops import get_op_result_name
@@ -83,7 +83,7 @@ def _delegate_method(self, name, *args, **kwargs):
                  if x not in {"asfreq", "to_timestamp"}],
                 typ="method",
                 overwrite=True)
-class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin,
+class PeriodIndex(DatetimeIndexOpsMixin,
                   Int64Index, PeriodDelegateMixin):
     """
     Immutable ndarray holding ordinal values indicating regular periods in
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index ed4e43df8f41a3..e33d61d29d302e 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -22,7 +22,7 @@
 import pandas.core.common as com
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.core.indexes.datetimelike import (
-    DatetimeIndexOpsMixin, TimelikeOps, wrap_arithmetic_op, wrap_array_method,
+    DatetimeIndexOpsMixin, wrap_arithmetic_op, wrap_array_method,
     wrap_field_accessor)
 from pandas.core.indexes.numeric import Int64Index
 from pandas.core.ops import get_op_result_name
@@ -31,8 +31,7 @@
 from pandas.tseries.frequencies import to_offset
 
 
-class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin,
-                     TimelikeOps, Int64Index):
+class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin, Int64Index):
     """
     Immutable ndarray of timedelta64 data, represented internally as int64, and
     which can be boxed to timedelta objects

From 9d85b22857d7e8c2337ba49250307b6c0c4d6225 Mon Sep 17 00:00:00 2001
From: Fabian Haase <haase.fabian@gmail.com>
Date: Sun, 2 Dec 2018 18:21:12 +0100
Subject: [PATCH 56/78] Fix PEP-8 issues in visualization.rst (#23898)

Signed-off-by: Fabian Haase <haase.fabian@gmail.com>
---
 doc/source/timeseries.rst | 126 +++++++++++++++++++++-----------------
 1 file changed, 69 insertions(+), 57 deletions(-)

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index c962486711cf34..bca7b6a601dd25 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -4,18 +4,12 @@
 .. ipython:: python
    :suppress:
 
-   import datetime
    import numpy as np
    import pandas as pd
-   from pandas import offsets
+
    np.random.seed(123456)
-   randn = np.random.randn
-   randint = np.random.randint
    np.set_printoptions(precision=4, suppress=True)
-   pd.options.display.max_rows=15
-   import dateutil
-   import pytz
-   from dateutil.relativedelta import relativedelta
+   pd.options.display.max_rows = 15
 
 ********************************
 Time Series / Date functionality
@@ -32,7 +26,10 @@ Parsing time series information from various sources and formats
 
 .. ipython:: python
 
-   dti = pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01'), datetime.datetime(2018, 1, 1)])
+   import datetime
+
+   dti = pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01'),
+                         datetime.datetime(2018, 1, 1)])
    dti
 
 Generate sequences of fixed-frequency dates and time spans
@@ -165,7 +162,9 @@ and :class:`PeriodIndex` respectively.
 
 .. ipython:: python
 
-   dates = [pd.Timestamp('2012-05-01'), pd.Timestamp('2012-05-02'), pd.Timestamp('2012-05-03')]
+   dates = [pd.Timestamp('2012-05-01'),
+            pd.Timestamp('2012-05-02'),
+            pd.Timestamp('2012-05-03')]
    ts = pd.Series(np.random.randn(3), dates)
 
    type(ts.index)
@@ -329,7 +328,7 @@ which can be specified. These are computed from the starting point specified by
                    1349979305, 1350065705], unit='s')
 
    pd.to_datetime([1349720105100, 1349720105200, 1349720105300,
-                   1349720105400, 1349720105500 ], unit='ms')
+                   1349720105400, 1349720105500], unit='ms')
 
 .. note::
 
@@ -402,7 +401,9 @@ To generate an index with timestamps, you can use either the ``DatetimeIndex`` o
 
 .. ipython:: python
 
-   dates = [datetime.datetime(2012, 5, 1), datetime.datetime(2012, 5, 2), datetime.datetime(2012, 5, 3)]
+   dates = [datetime.datetime(2012, 5, 1),
+            datetime.datetime(2012, 5, 2),
+            datetime.datetime(2012, 5, 3)]
 
    # Note the frequency information
    index = pd.DatetimeIndex(dates)
@@ -585,9 +586,8 @@ would include matching times on an included date:
 
 .. ipython:: python
 
-   dft = pd.DataFrame(randn(100000,1),
-                      columns=['A'],
-                      index=pd.date_range('20130101',periods=100000,freq='T'))
+   dft = pd.DataFrame(np.random.randn(100000, 1), columns=['A'],
+                      index=pd.date_range('20130101', periods=100000, freq='T'))
    dft
    dft['2013']
 
@@ -624,10 +624,9 @@ We are stopping on the included end-point as it is part of the index:
 
    dft2 = pd.DataFrame(np.random.randn(20, 1),
                        columns=['A'],
-                       index=pd.MultiIndex.from_product([pd.date_range('20130101',
-                                                                       periods=10,
-                                                                       freq='12H'),
-                                                        ['a', 'b']]))
+                       index=pd.MultiIndex.from_product(
+                           [pd.date_range('20130101', periods=10, freq='12H'),
+                            ['a', 'b']]))
    dft2
    dft2.loc['2013-01-05']
    idx = pd.IndexSlice
@@ -683,7 +682,7 @@ If the timestamp string is treated as a slice, it can be used to index ``DataFra
 .. ipython:: python
 
     dft_minute = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]},
-                               index=series_minute.index)
+                              index=series_minute.index)
     dft_minute['2011-12-31 23']
 
 
@@ -695,18 +694,16 @@ If the timestamp string is treated as a slice, it can be used to index ``DataFra
 
    .. ipython:: python
 
-     dft_minute.loc['2011-12-31 23:59']
+      dft_minute.loc['2011-12-31 23:59']
 
 Note also that ``DatetimeIndex`` resolution cannot be less precise than day.
 
 .. ipython:: python
 
     series_monthly = pd.Series([1, 2, 3],
-                              pd.DatetimeIndex(['2011-12',
-                                                '2012-01',
-                                                '2012-02']))
+                               pd.DatetimeIndex(['2011-12', '2012-01', '2012-02']))
     series_monthly.index.resolution
-    series_monthly['2011-12'] # returns Series
+    series_monthly['2011-12']  # returns Series
 
 
 Exact Indexing
@@ -718,13 +715,14 @@ These ``Timestamp`` and ``datetime`` objects have exact ``hours, minutes,`` and
 
 .. ipython:: python
 
-   dft[datetime.datetime(2013, 1, 1):datetime.datetime(2013,2,28)]
+   dft[datetime.datetime(2013, 1, 1):datetime.datetime(2013, 2, 28)]
 
 With no defaults.
 
 .. ipython:: python
 
-   dft[datetime.datetime(2013, 1, 1, 10, 12, 0):datetime.datetime(2013, 2, 28, 10, 12, 0)]
+   dft[datetime.datetime(2013, 1, 1, 10, 12, 0):
+       datetime.datetime(2013, 2, 28, 10, 12, 0)]
 
 
 Truncating & Fancy Indexing
@@ -1045,14 +1043,16 @@ As an interesting example, let's look at Egypt where a Friday-Saturday weekend i
 
 .. ipython:: python
 
-    from pandas.tseries.offsets import CustomBusinessDay
     weekmask_egypt = 'Sun Mon Tue Wed Thu'
 
     # They also observe International Workers' Day so let's
     # add that for a couple of years
 
-    holidays = ['2012-05-01', datetime.datetime(2013, 5, 1), np.datetime64('2014-05-01')]
-    bday_egypt = CustomBusinessDay(holidays=holidays, weekmask=weekmask_egypt)
+    holidays = ['2012-05-01',
+                datetime.datetime(2013, 5, 1),
+                np.datetime64('2014-05-01')]
+    bday_egypt = pd.offsets.CustomBusinessDay(holidays=holidays,
+                                              weekmask=weekmask_egypt)
     dt = datetime.datetime(2013, 4, 30)
     dt + 2 * bday_egypt
 
@@ -1062,7 +1062,8 @@ Let's map to the weekday names:
 
     dts = pd.date_range(dt, periods=5, freq=bday_egypt)
 
-    pd.Series(dts.weekday, dts).map(pd.Series('Mon Tue Wed Thu Fri Sat Sun'.split()))
+    pd.Series(dts.weekday, dts).map(
+        pd.Series('Mon Tue Wed Thu Fri Sat Sun'.split()))
 
 Holiday calendars can be used to provide the list of holidays.  See the
 :ref:`holiday calendar<timeseries.holiday>` section for more information.
@@ -1071,7 +1072,7 @@ Holiday calendars can be used to provide the list of holidays.  See the
 
     from pandas.tseries.holiday import USFederalHolidayCalendar
 
-    bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar())
+    bday_us = pd.offsets.CustomBusinessDay(calendar=USFederalHolidayCalendar())
 
     # Friday before MLK Day
     dt = datetime.datetime(2014, 1, 17)
@@ -1084,15 +1085,15 @@ in the usual way.
 
 .. ipython:: python
 
-    from pandas.tseries.offsets import CustomBusinessMonthBegin
-    bmth_us = pd.offsets.CustomBusinessMonthBegin(calendar=USFederalHolidayCalendar())
+    bmth_us = pd.offsets.CustomBusinessMonthBegin(
+        calendar=USFederalHolidayCalendar())
 
     # Skip new years
     dt = datetime.datetime(2013, 12, 17)
     dt + bmth_us
 
     # Define date index with custom offset
-    pd.DatetimeIndex(start='20100101',end='20120101',freq=bmth_us)
+    pd.DatetimeIndex(start='20100101', end='20120101', freq=bmth_us)
 
 .. note::
 
@@ -1231,7 +1232,8 @@ You can use keyword arguments supported by either ``BusinessHour`` and ``CustomB
 
 .. ipython:: python
 
-    bhour_mon = pd.offsets.CustomBusinessHour(start='10:00', weekmask='Tue Wed Thu Fri')
+    bhour_mon = pd.offsets.CustomBusinessHour(start='10:00',
+                                              weekmask='Tue Wed Thu Fri')
 
     # Monday is skipped because it's a holiday, business hour starts from 10:00
     dt + bhour_mon * 2
@@ -1429,11 +1431,14 @@ An example of how holidays and holiday calendars are defined:
             USMemorialDay,
             Holiday('July 4th', month=7, day=4, observance=nearest_workday),
             Holiday('Columbus Day', month=10, day=1,
-                offset=pd.DateOffset(weekday=MO(2))), #same as 2*Week(weekday=2)
-            ]
+                    offset=pd.DateOffset(weekday=MO(2)))]
+
     cal = ExampleCalendar()
     cal.holidays(datetime.datetime(2012, 1, 1), datetime.datetime(2012, 12, 31))
 
+:hint:
+   **weekday=MO(2)** is same as **2 * Week(weekday=2)**
+
 Using this calendar, creating an index or doing offset arithmetic skips weekends
 and holidays (i.e., Memorial Day/July 4th).  For example, the below defines
 a custom business day offset using the ``ExampleCalendar``.  Like any other offset,
@@ -1442,10 +1447,9 @@ or ``Timestamp`` objects.
 
 .. ipython:: python
 
-    from pandas.tseries.offsets import CDay
     pd.DatetimeIndex(start='7/1/2012', end='7/10/2012',
-        freq=CDay(calendar=cal)).to_pydatetime()
-    offset = CustomBusinessDay(calendar=cal)
+                     freq=pd.offsets.CDay(calendar=cal)).to_pydatetime()
+    offset = pd.offsets.CustomBusinessDay(calendar=cal)
     datetime.datetime(2012, 5, 25) + offset
     datetime.datetime(2012, 7, 3) + offset
     datetime.datetime(2012, 7, 3) + 2 * offset
@@ -1532,7 +1536,7 @@ calls ``reindex``.
 .. ipython:: python
 
    dr = pd.date_range('1/1/2010', periods=3, freq=3 * pd.offsets.BDay())
-   ts = pd.Series(randn(3), index=dr)
+   ts = pd.Series(np.random.randn(3), index=dr)
    ts
    ts.asfreq(pd.offsets.BDay())
 
@@ -1626,7 +1630,7 @@ labels.
 
 .. ipython:: python
 
-   ts.resample('5Min').mean() # by default label='left'
+   ts.resample('5Min').mean()  # by default label='left'
 
    ts.resample('5Min', label='left').mean()
 
@@ -1739,7 +1743,7 @@ We can select a specific column or columns using standard getitem.
 
    r['A'].mean()
 
-   r[['A','B']].mean()
+   r[['A', 'B']].mean()
 
 You can pass a list or dict of functions to do aggregation with, outputting a ``DataFrame``:
 
@@ -1760,21 +1764,21 @@ columns of a ``DataFrame``:
 .. ipython:: python
    :okexcept:
 
-   r.agg({'A' : np.sum,
-          'B' : lambda x: np.std(x, ddof=1)})
+   r.agg({'A': np.sum,
+          'B': lambda x: np.std(x, ddof=1)})
 
 The function names can also be strings. In order for a string to be valid it
 must be implemented on the resampled object:
 
 .. ipython:: python
 
-   r.agg({'A' : 'sum', 'B' : 'std'})
+   r.agg({'A': 'sum', 'B': 'std'})
 
 Furthermore, you can also specify multiple aggregation functions for each column separately.
 
 .. ipython:: python
 
-   r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] })
+   r.agg({'A': ['sum', 'std'], 'B': ['mean', 'std']})
 
 
 If a ``DataFrame`` does not have a datetimelike index, but instead you want
@@ -1786,9 +1790,9 @@ to resample based on datetimelike column in the frame, it can passed to the
    df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5),
                       'a': np.arange(5)},
                      index=pd.MultiIndex.from_arrays([
-                              [1,2,3,4,5],
-                              pd.date_range('2015-01-01', freq='W', periods=5)],
-                          names=['v','d']))
+                         [1, 2, 3, 4, 5],
+                         pd.date_range('2015-01-01', freq='W', periods=5)],
+                         names=['v', 'd']))
    df
    df.resample('M', on='date').sum()
 
@@ -1989,9 +1993,11 @@ Passing a string representing a lower frequency than ``PeriodIndex`` returns par
 
    ps['2011']
 
-   dfp = pd.DataFrame(np.random.randn(600,1),
+   dfp = pd.DataFrame(np.random.randn(600, 1),
                       columns=['A'],
-                      index=pd.period_range('2013-01-01 9:00', periods=600, freq='T'))
+                      index=pd.period_range('2013-01-01 9:00',
+                                            periods=600,
+                                            freq='T'))
    dfp
    dfp['2013-01-01 10H']
 
@@ -2180,6 +2186,8 @@ time zones by starting with ``dateutil/``.
 
 .. ipython:: python
 
+   import dateutil
+
    # pytz
    rng_pytz = pd.date_range('3/6/2012 00:00', periods=10, freq='D',
                             tz='Europe/London')
@@ -2201,6 +2209,8 @@ which gives you more control over which time zone is used:
 
 .. ipython:: python
 
+   import pytz
+
    # pytz
    tz_pytz = pytz.timezone('Europe/London')
    rng_pytz = pd.date_range('3/6/2012 00:00', periods=10, freq='D',
@@ -2299,7 +2309,8 @@ To remove timezone from tz-aware ``DatetimeIndex``, use ``tz_localize(None)`` or
 
 .. ipython:: python
 
-   didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H', periods=10, tz='US/Eastern')
+   didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H',
+                           periods=10, tz='US/Eastern')
    didx
    didx.tz_localize(None)
    didx.tz_convert(None)
@@ -2352,7 +2363,8 @@ constructor as well as ``tz_localize``.
    rng_hourly.tz_localize('US/Eastern', ambiguous=rng_hourly_dst).tolist()
    rng_hourly.tz_localize('US/Eastern', ambiguous='NaT').tolist()
 
-   didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H', periods=10, tz='US/Eastern')
+   didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H',
+                           periods=10, tz='US/Eastern')
    didx
    didx.tz_localize(None)
    didx.tz_convert(None)
@@ -2403,14 +2415,14 @@ TZ Aware Dtypes
 
 .. ipython:: python
 
-   s_naive = pd.Series(pd.date_range('20130101',periods=3))
+   s_naive = pd.Series(pd.date_range('20130101', periods=3))
    s_naive
 
 ``Series/DatetimeIndex`` with a timezone **aware** value are represented with a dtype of ``datetime64[ns, tz]``.
 
 .. ipython:: python
 
-   s_aware = pd.Series(pd.date_range('20130101',periods=3,tz='US/Eastern'))
+   s_aware = pd.Series(pd.date_range('20130101', periods=3, tz='US/Eastern'))
    s_aware
 
 Both of these ``Series`` can be manipulated via the ``.dt`` accessor, see :ref:`here <basics.dt_accessors>`.

From e303a46daa58f48f5bd0195dcb490b12c3e60db1 Mon Sep 17 00:00:00 2001
From: Fabian Haase <haase.fabian@gmail.com>
Date: Sun, 2 Dec 2018 18:22:23 +0100
Subject: [PATCH 57/78] DOC: Fix PEP-8 issues in computation.rst and
 comparison_*.rst (#24002)

---
 doc/source/comparison_with_r.rst     | 88 ++++++++++++++--------------
 doc/source/comparison_with_sql.rst   | 20 +++----
 doc/source/comparison_with_stata.rst | 23 ++++----
 doc/source/computation.rst           | 82 ++++++++++++++------------
 4 files changed, 108 insertions(+), 105 deletions(-)

diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst
index eecacde8ad14e7..704b0c4d80537a 100644
--- a/doc/source/comparison_with_r.rst
+++ b/doc/source/comparison_with_r.rst
@@ -6,7 +6,7 @@
 
    import pandas as pd
    import numpy as np
-   pd.options.display.max_rows=15
+   pd.options.display.max_rows = 15
 
 Comparison with R / R libraries
 *******************************
@@ -165,16 +165,15 @@ function.
 
 .. ipython:: python
 
-   df = pd.DataFrame({
-     'v1': [1,3,5,7,8,3,5,np.nan,4,5,7,9],
-     'v2': [11,33,55,77,88,33,55,np.nan,44,55,77,99],
-     'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12],
-     'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99, np.nan,
-             np.nan]
-   })
+   df = pd.DataFrame(
+       {'v1': [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
+        'v2': [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
+        'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12],
+        'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99, np.nan,
+                np.nan]})
 
-   g = df.groupby(['by1','by2'])
-   g[['v1','v2']].mean()
+   g = df.groupby(['by1', 'by2'])
+   g[['v1', 'v2']].mean()
 
 For more details and examples see :ref:`the groupby documentation
 <groupby.split>`.
@@ -195,7 +194,7 @@ The :meth:`~pandas.DataFrame.isin` method is similar to R ``%in%`` operator:
 
 .. ipython:: python
 
-   s = pd.Series(np.arange(5),dtype=np.float32)
+   s = pd.Series(np.arange(5), dtype=np.float32)
    s.isin([2, 4])
 
 The ``match`` function returns a vector of the positions of matches
@@ -234,11 +233,11 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this:
    import random
    import string
 
-   baseball = pd.DataFrame({
-      'team': ["team %d" % (x+1) for x in range(5)]*5,
-      'player': random.sample(list(string.ascii_lowercase),25),
-      'batting avg': np.random.uniform(.200, .400, 25)
-      })
+   baseball = pd.DataFrame(
+       {'team': ["team %d" % (x + 1) for x in range(5)] * 5,
+        'player': random.sample(list(string.ascii_lowercase), 25),
+        'batting avg': np.random.uniform(.200, .400, 25)})
+
    baseball.pivot_table(values='batting avg', columns='team', aggfunc=np.max)
 
 For more details and examples see :ref:`the reshaping documentation
@@ -341,15 +340,13 @@ In ``pandas`` the equivalent expression, using the
 
 .. ipython:: python
 
-   df = pd.DataFrame({
-       'x': np.random.uniform(1., 168., 120),
-       'y': np.random.uniform(7., 334., 120),
-       'z': np.random.uniform(1.7, 20.7, 120),
-       'month': [5,6,7,8]*30,
-       'week': np.random.randint(1,4, 120)
-   })
+   df = pd.DataFrame({'x': np.random.uniform(1., 168., 120),
+                      'y': np.random.uniform(7., 334., 120),
+                      'z': np.random.uniform(1.7, 20.7, 120),
+                      'month': [5, 6, 7, 8] * 30,
+                      'week': np.random.randint(1, 4, 120)})
 
-   grouped = df.groupby(['month','week'])
+   grouped = df.groupby(['month', 'week'])
    grouped['x'].agg([np.mean, np.std])
 
 
@@ -374,8 +371,8 @@ In Python, since ``a`` is a list, you can simply use list comprehension.
 
 .. ipython:: python
 
-   a = np.array(list(range(1,24))+[np.NAN]).reshape(2,3,4)
-   pd.DataFrame([tuple(list(x)+[val]) for x, val in np.ndenumerate(a)])
+   a = np.array(list(range(1, 24)) + [np.NAN]).reshape(2, 3, 4)
+   pd.DataFrame([tuple(list(x) + [val]) for x, val in np.ndenumerate(a)])
 
 |meltlist|_
 ~~~~~~~~~~~~
@@ -393,7 +390,7 @@ In Python, this list would be a list of tuples, so
 
 .. ipython:: python
 
-   a = list(enumerate(list(range(1,5))+[np.NAN]))
+   a = list(enumerate(list(range(1, 5)) + [np.NAN]))
    pd.DataFrame(a)
 
 For more details and examples see :ref:`the Into to Data Structures
@@ -419,12 +416,13 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent:
 
 .. ipython:: python
 
-   cheese = pd.DataFrame({'first' : ['John', 'Mary'],
-                       'last' : ['Doe', 'Bo'],
-                       'height' : [5.5, 6.0],
-                       'weight' : [130, 150]})
+   cheese = pd.DataFrame({'first': ['John', 'Mary'],
+                          'last': ['Doe', 'Bo'],
+                          'height': [5.5, 6.0],
+                          'weight': [130, 150]})
+
    pd.melt(cheese, id_vars=['first', 'last'])
-   cheese.set_index(['first', 'last']).stack() # alternative way
+   cheese.set_index(['first', 'last']).stack()  # alternative way
 
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.melt>`.
@@ -452,16 +450,15 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`:
 
 .. ipython:: python
 
-   df = pd.DataFrame({
-        'x': np.random.uniform(1., 168., 12),
-        'y': np.random.uniform(7., 334., 12),
-        'z': np.random.uniform(1.7, 20.7, 12),
-        'month': [5,6,7]*4,
-        'week': [1,2]*6
-   })
+   df = pd.DataFrame({'x': np.random.uniform(1., 168., 12),
+                      'y': np.random.uniform(7., 334., 12),
+                      'z': np.random.uniform(1.7, 20.7, 12),
+                      'month': [5, 6, 7] * 4,
+                      'week': [1, 2] * 6})
+
    mdf = pd.melt(df, id_vars=['month', 'week'])
-   pd.pivot_table(mdf, values='value', index=['variable','week'],
-                    columns=['month'], aggfunc=np.mean)
+   pd.pivot_table(mdf, values='value', index=['variable', 'week'],
+                  columns=['month'], aggfunc=np.mean)
 
 Similarly for ``dcast`` which uses a data.frame called ``df`` in R to
 aggregate information based on ``Animal`` and ``FeedType``:
@@ -491,13 +488,14 @@ using :meth:`~pandas.pivot_table`:
        'Amount': [10, 7, 4, 2, 5, 6, 2],
    })
 
-   df.pivot_table(values='Amount', index='Animal', columns='FeedType', aggfunc='sum')
+   df.pivot_table(values='Amount', index='Animal', columns='FeedType',
+                  aggfunc='sum')
 
 The second approach is to use the :meth:`~pandas.DataFrame.groupby` method:
 
 .. ipython:: python
 
-   df.groupby(['Animal','FeedType'])['Amount'].sum()
+   df.groupby(['Animal', 'FeedType'])['Amount'].sum()
 
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.pivot>` or :ref:`the groupby documentation<groupby.split>`.
@@ -516,8 +514,8 @@ In pandas this is accomplished with ``pd.cut`` and ``astype("category")``:
 
 .. ipython:: python
 
-   pd.cut(pd.Series([1,2,3,4,5,6]), 3)
-   pd.Series([1,2,3,2,2,3]).astype("category")
+   pd.cut(pd.Series([1, 2, 3, 4, 5, 6]), 3)
+   pd.Series([1, 2, 3, 2, 2, 3]).astype("category")
 
 For more details and examples see :ref:`categorical introduction <categorical>` and the
 :ref:`API documentation <api.categorical>`. There is also a documentation regarding the
diff --git a/doc/source/comparison_with_sql.rst b/doc/source/comparison_with_sql.rst
index db143cd5864410..021f37eb5c66fc 100644
--- a/doc/source/comparison_with_sql.rst
+++ b/doc/source/comparison_with_sql.rst
@@ -23,7 +23,8 @@ structure.
 
 .. ipython:: python
 
-    url = 'https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv'
+    url = ('https://raw.github.com/pandas-dev'
+           '/pandas/master/pandas/tests/data/tips.csv')
     tips = pd.read_csv(url)
     tips.head()
 
@@ -387,7 +388,7 @@ Top N rows with offset
 
 .. ipython:: python
 
-    tips.nlargest(10+5, columns='tip').tail(10)
+    tips.nlargest(10 + 5, columns='tip').tail(10)
 
 Top N rows per group
 ~~~~~~~~~~~~~~~~~~~~
@@ -411,8 +412,7 @@ Top N rows per group
                         .groupby(['day'])
                         .cumcount() + 1)
          .query('rn < 3')
-         .sort_values(['day','rn'])
-    )
+         .sort_values(['day', 'rn']))
 
 the same using `rank(method='first')` function
 
@@ -421,8 +421,7 @@ the same using `rank(method='first')` function
     (tips.assign(rnk=tips.groupby(['day'])['total_bill']
                          .rank(method='first', ascending=False))
          .query('rnk < 3')
-         .sort_values(['day','rnk'])
-    )
+         .sort_values(['day', 'rnk']))
 
 .. code-block:: sql
 
@@ -445,11 +444,10 @@ Notice that when using ``rank(method='min')`` function
 .. ipython:: python
 
     (tips[tips['tip'] < 2]
-         .assign(rnk_min=tips.groupby(['sex'])['tip']
-                             .rank(method='min'))
-         .query('rnk_min < 3')
-         .sort_values(['sex','rnk_min'])
-    )
+        .assign(rnk_min=tips.groupby(['sex'])['tip']
+                            .rank(method='min'))
+        .query('rnk_min < 3')
+        .sort_values(['sex', 'rnk_min']))
 
 
 UPDATE
diff --git a/doc/source/comparison_with_stata.rst b/doc/source/comparison_with_stata.rst
index 6c518983d5904a..e039843b220656 100644
--- a/doc/source/comparison_with_stata.rst
+++ b/doc/source/comparison_with_stata.rst
@@ -102,9 +102,7 @@ and the values are the data.
 
 .. ipython:: python
 
-   df = pd.DataFrame({
-            'x': [1, 3, 5],
-            'y': [2, 4, 6]})
+   df = pd.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]})
    df
 
 
@@ -128,7 +126,8 @@ the data set if presented with a url.
 
 .. ipython:: python
 
-   url = 'https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv'
+   url = ('https://raw.github.com/pandas-dev'
+          '/pandas/master/pandas/tests/data/tips.csv')
    tips = pd.read_csv(url)
    tips.head()
 
@@ -278,17 +277,17 @@ see the :ref:`timeseries documentation<timeseries>` for more details.
    tips['date1_year'] = tips['date1'].dt.year
    tips['date2_month'] = tips['date2'].dt.month
    tips['date1_next'] = tips['date1'] + pd.offsets.MonthBegin()
-   tips['months_between'] = (tips['date2'].dt.to_period('M') -
-                             tips['date1'].dt.to_period('M'))
+   tips['months_between'] = (tips['date2'].dt.to_period('M')
+                             - tips['date1'].dt.to_period('M'))
 
-   tips[['date1','date2','date1_year','date2_month',
-         'date1_next','months_between']].head()
+   tips[['date1', 'date2', 'date1_year', 'date2_month', 'date1_next',
+         'months_between']].head()
 
 .. ipython:: python
    :suppress:
 
-   tips = tips.drop(['date1','date2','date1_year',
-      'date2_month','date1_next','months_between'], axis=1)
+   tips = tips.drop(['date1', 'date2', 'date1_year', 'date2_month',
+                     'date1_next', 'months_between'], axis=1)
 
 Selection of Columns
 ~~~~~~~~~~~~~~~~~~~~
@@ -472,7 +471,7 @@ The following tables will be used in the merge examples
                        'value': np.random.randn(4)})
    df1
    df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'],
-                        'value': np.random.randn(4)})
+                       'value': np.random.randn(4)})
    df2
 
 In Stata, to perform a merge, one data set must be in memory
@@ -661,7 +660,7 @@ In pandas this would be written as:
 
 .. ipython:: python
 
-   tips.groupby(['sex','smoker']).first()
+   tips.groupby(['sex', 'smoker']).first()
 
 
 Other Considerations
diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index 0d2021de8f88e0..251dce5141ea59 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -4,14 +4,15 @@
    :suppress:
 
    import numpy as np
+   import matplotlib.pyplot as plt
+
+   import pandas as pd
+
    np.random.seed(123456)
    np.set_printoptions(precision=4, suppress=True)
-   import pandas as pd
-   import matplotlib
-   # matplotlib.style.use('default')
-   import matplotlib.pyplot as plt
+   pd.options.display.max_rows = 15
+
    plt.close('all')
-   pd.options.display.max_rows=15
 
 .. _computation:
 
@@ -75,7 +76,8 @@ series in the DataFrame, also excluding NA/null values.
 
 .. ipython:: python
 
-   frame = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])
+   frame = pd.DataFrame(np.random.randn(1000, 5),
+                        columns=['a', 'b', 'c', 'd', 'e'])
    frame.cov()
 
 ``DataFrame.cov`` also supports an optional ``min_periods`` keyword that
@@ -127,7 +129,8 @@ Wikipedia has articles covering the above correlation coefficients:
 
 .. ipython:: python
 
-   frame = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])
+   frame = pd.DataFrame(np.random.randn(1000, 5),
+                        columns=['a', 'b', 'c', 'd', 'e'])
    frame.iloc[::2] = np.nan
 
    # Series with Series
@@ -163,9 +166,10 @@ compute the correlation based on histogram intersection:
 .. ipython:: python
 
    # histogram intersection
-   histogram_intersection = lambda a, b: np.minimum(
-       np.true_divide(a, a.sum()), np.true_divide(b, b.sum())
-   ).sum()
+   def histogram_intersection(a, b):
+       return np.minimum(np.true_divide(a, a.sum()),
+                         np.true_divide(b, b.sum())).sum()
+
    frame.corr(method=histogram_intersection)
 
 A related method :meth:`~DataFrame.corrwith` is implemented on DataFrame to 
@@ -192,7 +196,7 @@ assigned the mean of the ranks (by default) for the group:
 .. ipython:: python
 
    s = pd.Series(np.random.np.random.randn(5), index=list('abcde'))
-   s['d'] = s['b'] # so there's a tie
+   s['d'] = s['b']  # so there's a tie
    s.rank()
 
 :meth:`~DataFrame.rank` is also a DataFrame method and can rank either the rows 
@@ -202,7 +206,7 @@ ranking.
 .. ipython:: python
 
    df = pd.DataFrame(np.random.np.random.randn(10, 6))
-   df[4] = df[2][:5] # some ties
+   df[4] = df[2][:5]  # some ties
    df
    df.rank(1)
 
@@ -243,7 +247,8 @@ objects, :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expan
 
 .. ipython:: python
 
-   s = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
+   s = pd.Series(np.random.randn(1000),
+                 index=pd.date_range('1/1/2000', periods=1000))
    s = s.cumsum()
    s
 
@@ -258,7 +263,7 @@ These object provide tab-completion of the available methods and properties.
 
 .. code-block:: ipython
 
-   In [14]: r.
+   In [14]: r.<TAB>                                          # noqa: E225, E999
    r.agg         r.apply       r.count       r.exclusions  r.max         r.median      r.name        r.skew        r.sum
    r.aggregate   r.corr        r.cov         r.kurt        r.mean        r.min         r.quantile    r.std         r.var
 
@@ -336,7 +341,9 @@ compute the mean absolute deviation on a rolling basis:
 
 .. ipython:: python
 
-   mad = lambda x: np.fabs(x - x.mean()).mean()
+   def mad(x):
+       return np.fabs(x - x.mean()).mean()
+
    @savefig rolling_apply_ex.png
    s.rolling(window=60).apply(mad, raw=True).plot(style='k')
 
@@ -376,7 +383,8 @@ The list of recognized types are the `scipy.signal window functions
 
 .. ipython:: python
 
-   ser = pd.Series(np.random.randn(10), index=pd.date_range('1/1/2000', periods=10))
+   ser = pd.Series(np.random.randn(10),
+                   index=pd.date_range('1/1/2000', periods=10))
 
    ser.rolling(window=5, win_type='triang').mean()
 
@@ -423,7 +431,9 @@ This can be particularly useful for a non-regular time frequency index.
 .. ipython:: python
 
    dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
-                      index=pd.date_range('20130101 09:00:00', periods=5, freq='s'))
+                      index=pd.date_range('20130101 09:00:00',
+                                          periods=5,
+                                          freq='s'))
    dft
 
 This is a regular frequency index. Using an integer window parameter works to roll along the window frequency.
@@ -445,12 +455,12 @@ Using a non-regular, but still monotonic index, rolling with an integer window d
 .. ipython:: python
 
    dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
-                      index = pd.Index([pd.Timestamp('20130101 09:00:00'),
-                                        pd.Timestamp('20130101 09:00:02'),
-                                        pd.Timestamp('20130101 09:00:03'),
-                                        pd.Timestamp('20130101 09:00:05'),
-                                        pd.Timestamp('20130101 09:00:06')],
-                                       name='foo'))
+                      index=pd.Index([pd.Timestamp('20130101 09:00:00'),
+                                      pd.Timestamp('20130101 09:00:02'),
+                                      pd.Timestamp('20130101 09:00:03'),
+                                      pd.Timestamp('20130101 09:00:05'),
+                                      pd.Timestamp('20130101 09:00:06')],
+                                     name='foo'))
    dft
    dft.rolling(2).sum()
 
@@ -496,11 +506,11 @@ from present information back to past information. This allows the rolling windo
 .. ipython:: python
 
    df = pd.DataFrame({'x': 1},
-                     index = [pd.Timestamp('20130101 09:00:01'),
-                              pd.Timestamp('20130101 09:00:02'),
-                              pd.Timestamp('20130101 09:00:03'),
-                              pd.Timestamp('20130101 09:00:04'),
-                              pd.Timestamp('20130101 09:00:06')])
+                     index=[pd.Timestamp('20130101 09:00:01'),
+                            pd.Timestamp('20130101 09:00:02'),
+                            pd.Timestamp('20130101 09:00:03'),
+                            pd.Timestamp('20130101 09:00:04'),
+                            pd.Timestamp('20130101 09:00:06')])
 
    df["right"] = df.rolling('2s', closed='right').x.sum()  # default
    df["both"] = df.rolling('2s', closed='both').x.sum()
@@ -601,7 +611,8 @@ can even be omitted:
 
 .. ipython:: python
 
-   covs = df[['B','C','D']].rolling(window=50).cov(df[['A','B','C']], pairwise=True)
+   covs = (df[['B', 'C', 'D']].rolling(window=50)
+                              .cov(df[['A', 'B', 'C']], pairwise=True))
    covs.loc['2002-09-22':]
 
 .. ipython:: python
@@ -637,7 +648,7 @@ perform multiple computations on the data. These operations are similar to the :
    dfa = pd.DataFrame(np.random.randn(1000, 3),
                       index=pd.date_range('1/1/2000', periods=1000),
                       columns=['A', 'B', 'C'])
-   r = dfa.rolling(window=60,min_periods=1)
+   r = dfa.rolling(window=60, min_periods=1)
    r
 
 We can aggregate by passing a function to the entire DataFrame, or select a 
@@ -649,7 +660,7 @@ Series (or multiple Series) via standard ``__getitem__``.
 
    r['A'].aggregate(np.sum)
 
-   r[['A','B']].aggregate(np.sum)
+   r[['A', 'B']].aggregate(np.sum)
 
 As you can see, the result of the aggregation will have the selected columns, or all
 columns if none are selected.
@@ -683,24 +694,21 @@ By passing a dict to ``aggregate`` you can apply a different aggregation to the
 columns of a ``DataFrame``:
 
 .. ipython:: python
-   :okexcept:
-   :okwarning:
 
-   r.agg({'A' : np.sum,
-          'B' : lambda x: np.std(x, ddof=1)})
+   r.agg({'A': np.sum, 'B': lambda x: np.std(x, ddof=1)})
 
 The function names can also be strings. In order for a string to be valid it
 must be implemented on the windowed object
 
 .. ipython:: python
 
-   r.agg({'A' : 'sum', 'B' : 'std'})
+   r.agg({'A': 'sum', 'B': 'std'})
 
 Furthermore you can pass a nested dict to indicate different aggregations on different columns.
 
 .. ipython:: python
 
-   r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] })
+   r.agg({'A': ['sum', 'std'], 'B': ['mean', 'std']})
 
 
 .. _stats.moments.expanding:

From 1c6a968ae581931bfb70a6b338b6b40032617e1a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 2 Dec 2018 10:03:44 -0800
Subject: [PATCH 58/78] REF: Move non-raising parts of array_to_datetime
 outside of try/except (#24032)

---
 pandas/_libs/tslib.pyx | 124 ++++++++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 50 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 1205ebbe311e2a..a3e6c7e3449401 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -520,9 +520,10 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
     # specify error conditions
     assert is_raise or is_ignore or is_coerce
 
+    result = np.empty(n, dtype='M8[ns]')
+    iresult = result.view('i8')
+
     try:
-        result = np.empty(n, dtype='M8[ns]')
-        iresult = result.view('i8')
         for i in range(n):
             val = values[i]
 
@@ -703,62 +704,85 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
                     raise TypeError("{typ} is not convertible to datetime"
                                     .format(typ=type(val)))
 
-        if seen_datetime and seen_integer:
-            # we have mixed datetimes & integers
-
-            if is_coerce:
-                # coerce all of the integers/floats to NaT, preserve
-                # the datetimes and other convertibles
-                for i in range(n):
-                    val = values[i]
-                    if is_integer_object(val) or is_float_object(val):
-                        result[i] = NPY_NAT
-            elif is_raise:
-                raise ValueError(
-                    "mixed datetimes and integers in passed array")
-            else:
-                raise TypeError
-
-        if seen_datetime_offset and not utc_convert:
-            # GH 17697
-            # 1) If all the offsets are equal, return one offset for
-            #    the parsed dates to (maybe) pass to DatetimeIndex
-            # 2) If the offsets are different, then force the parsing down the
-            #    object path where an array of datetimes
-            #    (with individual dateutil.tzoffsets) are returned
-            is_same_offsets = len(out_tzoffset_vals) == 1
-            if not is_same_offsets:
-                return array_to_datetime_object(values, is_raise,
-                                                dayfirst, yearfirst)
-            else:
-                tz_offset = out_tzoffset_vals.pop()
-                tz_out = pytz.FixedOffset(tz_offset / 60.)
-        return result, tz_out
     except OutOfBoundsDatetime:
         if is_raise:
             raise
 
-        oresult = np.empty(n, dtype=object)
-        for i in range(n):
-            val = values[i]
+        return ignore_errors_out_of_bounds_fallback(values), tz_out
 
-            # set as nan except if its a NaT
-            if checknull_with_nat(val):
-                if isinstance(val, float):
-                    oresult[i] = np.nan
-                else:
-                    oresult[i] = NaT
-            elif is_datetime64_object(val):
-                if get_datetime64_value(val) == NPY_NAT:
-                    oresult[i] = NaT
-                else:
-                    oresult[i] = val.item()
-            else:
-                oresult[i] = val
-        return oresult, tz_out
     except TypeError:
         return array_to_datetime_object(values, is_raise, dayfirst, yearfirst)
 
+    if seen_datetime and seen_integer:
+        # we have mixed datetimes & integers
+
+        if is_coerce:
+            # coerce all of the integers/floats to NaT, preserve
+            # the datetimes and other convertibles
+            for i in range(n):
+                val = values[i]
+                if is_integer_object(val) or is_float_object(val):
+                    result[i] = NPY_NAT
+        elif is_raise:
+            raise ValueError("mixed datetimes and integers in passed array")
+        else:
+            return array_to_datetime_object(values, is_raise,
+                                            dayfirst, yearfirst)
+
+    if seen_datetime_offset and not utc_convert:
+        # GH#17697
+        # 1) If all the offsets are equal, return one offset for
+        #    the parsed dates to (maybe) pass to DatetimeIndex
+        # 2) If the offsets are different, then force the parsing down the
+        #    object path where an array of datetimes
+        #    (with individual dateutil.tzoffsets) are returned
+        is_same_offsets = len(out_tzoffset_vals) == 1
+        if not is_same_offsets:
+            return array_to_datetime_object(values, is_raise,
+                                            dayfirst, yearfirst)
+        else:
+            tz_offset = out_tzoffset_vals.pop()
+            tz_out = pytz.FixedOffset(tz_offset / 60.)
+    return result, tz_out
+
+
+cdef inline ignore_errors_out_of_bounds_fallback(ndarray[object] values):
+    """
+    Fallback for array_to_datetime if an OutOfBoundsDatetime is raised
+    and errors == "ignore"
+
+    Parameters
+    ----------
+    values : ndarray[object]
+
+    Returns
+    -------
+    ndarray[object]
+    """
+    cdef:
+        Py_ssize_t i, n = len(values)
+        object val
+
+    oresult = np.empty(n, dtype=object)
+
+    for i in range(n):
+        val = values[i]
+
+        # set as nan except if its a NaT
+        if checknull_with_nat(val):
+            if isinstance(val, float):
+                oresult[i] = np.nan
+            else:
+                oresult[i] = NaT
+        elif is_datetime64_object(val):
+            if get_datetime64_value(val) == NPY_NAT:
+                oresult[i] = NaT
+            else:
+                oresult[i] = val.item()
+        else:
+            oresult[i] = val
+    return oresult
+
 
 @cython.wraparound(False)
 @cython.boundscheck(False)

From 212a1d1010163e9929ec554f66ac5dcc80bedb92 Mon Sep 17 00:00:00 2001
From: Thomas Lentali <tlentali@users.noreply.github.com>
Date: Sun, 2 Dec 2018 19:06:07 +0100
Subject: [PATCH 59/78] Fix the docstring of xs in pandas/core/generic.py
 #22892  (#23913)

---
 pandas/core/generic.py | 131 +++++++++++++++++++++++++----------------
 1 file changed, 81 insertions(+), 50 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 08c07da39128f6..c58c84b422209b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3420,71 +3420,102 @@ class  max_speed
 
     def xs(self, key, axis=0, level=None, drop_level=True):
         """
-        Returns a cross-section (row(s) or column(s)) from the
-        Series/DataFrame. Defaults to cross-section on the rows (axis=0).
+        Return cross-section from the Series/DataFrame.
+
+        This method takes a `key` argument to select data at a particular
+        level of a MultiIndex.
 
         Parameters
         ----------
-        key : object
-            Some label contained in the index, or partially in a MultiIndex
-        axis : int, default 0
-            Axis to retrieve cross-section on
+        key : label or tuple of label
+            Label contained in the index, or partially in a MultiIndex.
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            Axis to retrieve cross-section on.
         level : object, defaults to first n levels (n=1 or len(key))
             In case of a key partially contained in a MultiIndex, indicate
             which levels are used. Levels can be referred by label or position.
-        drop_level : boolean, default True
+        drop_level : bool, default True
             If False, returns object with same levels as self.
 
+        Returns
+        -------
+        Series or DataFrame
+            Cross-section from the original Series or DataFrame
+            corresponding to the selected index levels.
+
+        See Also
+        --------
+        DataFrame.loc : Access a group of rows and columns
+            by label(s) or a boolean array.
+        DataFrame.iloc : Purely integer-location based indexing
+            for selection by position.
+
+        Notes
+        -----
+        `xs` can not be used to set values.
+
+        MultiIndex Slicers is a generic way to get/set values on
+        any level or levels.
+        It is a superset of `xs` functionality, see
+        :ref:`MultiIndex Slicers <advanced.mi_slicers>`.
+
         Examples
         --------
+        >>> d = {'num_legs': [4, 4, 2, 2],
+        ...      'num_wings': [0, 0, 2, 2],
+        ...      'class': ['mammal', 'mammal', 'mammal', 'bird'],
+        ...      'animal': ['cat', 'dog', 'bat', 'penguin'],
+        ...      'locomotion': ['walks', 'walks', 'flies', 'walks']}
+        >>> df = pd.DataFrame(data=d)
+        >>> df = df.set_index(['class', 'animal', 'locomotion'])
         >>> df
-           A  B  C
-        a  4  5  2
-        b  4  0  9
-        c  9  7  3
-        >>> df.xs('a')
-        A    4
-        B    5
-        C    2
-        Name: a
-        >>> df.xs('C', axis=1)
-        a    2
-        b    9
-        c    3
-        Name: C
+                                   num_legs  num_wings
+        class  animal  locomotion
+        mammal cat     walks              4          0
+               dog     walks              4          0
+               bat     flies              2          2
+        bird   penguin walks              2          2
 
-        >>> df
-                            A  B  C  D
-        first second third
-        bar   one    1      4  1  8  9
-              two    1      7  5  5  0
-        baz   one    1      6  6  8  0
-              three  2      5  3  5  3
-        >>> df.xs(('baz', 'three'))
-               A  B  C  D
-        third
-        2      5  3  5  3
-        >>> df.xs('one', level=1)
-                     A  B  C  D
-        first third
-        bar   1      4  1  8  9
-        baz   1      6  6  8  0
-        >>> df.xs(('baz', 2), level=[0, 'third'])
-                A  B  C  D
-        second
-        three   5  3  5  3
+        Get values at specified index
 
-        Returns
-        -------
-        xs : Series or DataFrame
+        >>> df.xs('mammal')
+                           num_legs  num_wings
+        animal locomotion
+        cat    walks              4          0
+        dog    walks              4          0
+        bat    flies              2          2
 
-        Notes
-        -----
-        xs is only for getting, not setting values.
+        Get values at several indexes
+
+        >>> df.xs(('mammal', 'dog'))
+                    num_legs  num_wings
+        locomotion
+        walks              4          0
+
+        Get values at specified index and level
+
+        >>> df.xs('cat', level=1)
+                           num_legs  num_wings
+        class  locomotion
+        mammal walks              4          0
+
+        Get values at several indexes and levels
+
+        >>> df.xs(('bird', 'walks'),
+        ...       level=[0, 'locomotion'])
+                 num_legs  num_wings
+        animal
+        penguin         2          2
+
+        Get values at specified column and axis
 
-        MultiIndex Slicers is a generic way to get/set values on any level or
-        levels.  It is a superset of xs functionality, see
-        :ref:`MultiIndex Slicers <advanced.mi_slicers>`
+        >>> df.xs('num_wings', axis=1)
+        class   animal   locomotion
+        mammal  cat      walks         0
+                dog      walks         0
+                bat      flies         2
+        bird    penguin  walks         2
+        Name: num_wings, dtype: int64
         """
         axis = self._get_axis_number(axis)
         labels = self._get_axis(axis)

From 35b170258e4bb0557b145218b66112dd27aba485 Mon Sep 17 00:00:00 2001
From: Fabian Haase <haase.fabian@gmail.com>
Date: Sun, 2 Dec 2018 19:09:10 +0100
Subject: [PATCH 60/78] DOC: Upgrade flake8-rst version (#23847)

---
 ci/deps/travis-36.yaml |  2 +-
 environment.yml        |  2 +-
 requirements-dev.txt   |  2 +-
 setup.cfg              | 46 +++++++++++++++++++++++++++++++++++++++---
 4 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/ci/deps/travis-36.yaml b/ci/deps/travis-36.yaml
index 1781f67041f44a..de76f5d6d763ff 100644
--- a/ci/deps/travis-36.yaml
+++ b/ci/deps/travis-36.yaml
@@ -9,7 +9,7 @@ dependencies:
   - fastparquet
   - flake8>=3.5
   - flake8-comprehensions
-  - flake8-rst=0.4.2
+  - flake8-rst>=0.6.0
   - gcsfs
   - geopandas
   - html5lib
diff --git a/environment.yml b/environment.yml
index aa3f1fe15820ec..4daaa90247fa8b 100644
--- a/environment.yml
+++ b/environment.yml
@@ -13,7 +13,7 @@ dependencies:
   - cython>=0.28.2
   - flake8
   - flake8-comprehensions
-  - flake8-rst=0.4.2
+  - flake8-rst>=0.6.0
   - gitpython
   - hypothesis>=3.82
   - isort
diff --git a/requirements-dev.txt b/requirements-dev.txt
index d24baf3c733568..5e2da69df5f269 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -4,7 +4,7 @@ pytz
 cython>=0.28.2
 flake8
 flake8-comprehensions
-flake8-rst==0.4.2
+flake8-rst>=0.6.0
 gitpython
 hypothesis>=3.82
 isort
diff --git a/setup.cfg b/setup.cfg
index 8fba814188af53..cc7393e5a09b95 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -31,25 +31,65 @@ exclude =
     env  # exclude asv benchmark environments from linting
 
 [flake8-rst]
-ignore =
-    F821,  # undefined name
-    W391,  # blank line at end of file [Seems to be a bug (v0.4.1)]
+ignore = E402,  # module level import not at top of file
+         W503,  # line break before binary operator
 exclude =
     doc/source/whatsnew/v0.7.0.rst
+    doc/source/whatsnew/v0.7.3.rst
+    doc/source/whatsnew/v0.8.0.rst
+    doc/source/whatsnew/v0.9.0.rst
+    doc/source/whatsnew/v0.9.1.rst
+    doc/source/whatsnew/v0.10.0.rst
     doc/source/whatsnew/v0.10.1.rst
+    doc/source/whatsnew/v0.11.0.rst
     doc/source/whatsnew/v0.12.0.rst
     doc/source/whatsnew/v0.13.0.rst
     doc/source/whatsnew/v0.13.1.rst
     doc/source/whatsnew/v0.14.0.rst
+    doc/source/whatsnew/v0.14.1.rst
     doc/source/whatsnew/v0.15.0.rst
+    doc/source/whatsnew/v0.15.1.rst
+    doc/source/whatsnew/v0.15.2.rst
     doc/source/whatsnew/v0.16.0.rst
+    doc/source/whatsnew/v0.16.1.rst
     doc/source/whatsnew/v0.16.2.rst
     doc/source/whatsnew/v0.17.0.rst
+    doc/source/whatsnew/v0.17.1.rst
     doc/source/whatsnew/v0.18.0.rst
     doc/source/whatsnew/v0.18.1.rst
+    doc/source/whatsnew/v0.19.0.rst
     doc/source/whatsnew/v0.20.0.rst
     doc/source/whatsnew/v0.21.0.rst
+    doc/source/whatsnew/v0.22.0.rst
     doc/source/whatsnew/v0.23.0.rst
+    doc/source/whatsnew/v0.23.1.rst
+    doc/source/whatsnew/v0.23.2.rst
+    doc/source/whatsnew/v0.24.0.rst
+    doc/source/10min.rst
+    doc/source/advanced.rst
+    doc/source/basics.rst
+    doc/source/categorical.rst
+    doc/source/comparison_with_r.rst
+    doc/source/comparison_with_sql.rst
+    doc/source/comparison_with_stata.rst
+    doc/source/computation.rst
+    doc/source/contributing.rst
+    doc/source/contributing_docstring.rst
+    doc/source/dsintro.rst
+    doc/source/enhancingperf.rst
+    doc/source/extending.rst
+    doc/source/groupby.rst
+    doc/source/indexing.rst
+    doc/source/io.rst
+    doc/source/merging.rst
+    doc/source/missing_data.rst
+    doc/source/options.rst
+    doc/source/release.rst
+    doc/source/reshaping.rst
+    doc/source/timedeltas.rst
+    doc/source/timeseries.rst
+    doc/source/visualization.rst
+
 
 [yapf]
 based_on_style = pep8

From 6dd130a08189ce9cfebf1a49f4ba98a9721b49a8 Mon Sep 17 00:00:00 2001
From: Saurav Chakravorty <sauravchakravorty@gmail.com>
Date: Sun, 2 Dec 2018 23:48:27 +0530
Subject: [PATCH 61/78] DOC: conform to PEP-8 (#23855)

---
 doc/source/io.rst | 371 ++++++++++++++++++++++++++--------------------
 1 file changed, 211 insertions(+), 160 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 2b91836d5449d2..372a7b8a325e74 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -5,25 +5,23 @@
 .. ipython:: python
    :suppress:
 
-   import os
    import csv
-   from pandas.compat import StringIO, BytesIO
-   import pandas as pd
-   ExcelWriter = pd.ExcelWriter
+   import os
 
+   import matplotlib.pyplot as plt
    import numpy as np
-   np.random.seed(123456)
+   import pandas as pd
+   from pandas.compat import StringIO, BytesIO
+
+
    randn = np.random.randn
    np.set_printoptions(precision=4, suppress=True)
-
-   import matplotlib.pyplot as plt
    plt.close('all')
-
-   import pandas.util.testing as tm
    pd.options.display.max_rows = 15
    clipdf = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': ['p', 'q', 'r']},
                          index=['x', 'y', 'z'])
 
+
 ===============================
 IO Tools (Text, CSV, HDF5, ...)
 ===============================
@@ -146,7 +144,10 @@ usecols : list-like or callable, default ``None``
 
   .. ipython:: python
 
-     data = 'col1,col2,col3\na,b,1\na,b,2\nc,d,3'
+     data = ('col1,col2,col3\n'
+             'a,b,1\n'
+             'a,b,2\n'
+             'c,d,3')
      pd.read_csv(StringIO(data))
      pd.read_csv(StringIO(data), usecols=lambda x: x.upper() in ['COL1', 'COL3'])
 
@@ -192,7 +193,10 @@ skiprows : list-like or integer, default ``None``
 
   .. ipython:: python
 
-     data = 'col1,col2,col3\na,b,1\na,b,2\nc,d,3'
+     data = ('col1,col2,col3\n'
+             'a,b,1\n'
+             'a,b,2\n'
+             'c,d,3')
      pd.read_csv(StringIO(data))
      pd.read_csv(StringIO(data), skiprows=lambda x: x % 2 != 0)
 
@@ -367,7 +371,10 @@ columns:
 
 .. ipython:: python
 
-    data = 'a,b,c\n1,2,3\n4,5,6\n7,8,9'
+    data = ('a,b,c\n'
+            '1,2,3\n'
+            '4,5,6\n'
+            '7,8,9')
     print(data)
 
     df = pd.read_csv(StringIO(data), dtype=object)
@@ -388,7 +395,11 @@ of :func:`~pandas.read_csv`:
 
 .. ipython:: python
 
-    data = "col_1\n1\n2\n'A'\n4.22"
+    data = ("col_1\n"
+            "1\n"
+            "2\n"
+            "'A'\n"
+            "4.22")
     df = pd.read_csv(StringIO(data), converters={'col_1': str})
     df
     df['col_1'].apply(type).value_counts()
@@ -427,7 +438,8 @@ worth trying.
    .. ipython:: python
         :okwarning:
 
-        df = pd.DataFrame({'col_1': list(range(500000)) + ['a', 'b'] + list(range(500000))})
+        col_1 = list(range(500000)) + ['a', 'b'] + list(range(500000))
+        df = pd.DataFrame({'col_1': col_1})
         df.to_csv('foo.csv')
         mixed_df = pd.read_csv('foo.csv')
         mixed_df['col_1'].apply(type).value_counts()
@@ -455,7 +467,10 @@ Specifying Categorical dtype
 
 .. ipython:: python
 
-   data = 'col1,col2,col3\na,b,1\na,b,2\nc,d,3'
+   data = ('col1,col2,col3\n'
+           'a,b,1\n'
+           'a,b,2\n'
+           'c,d,3')
 
    pd.read_csv(StringIO(data))
    pd.read_csv(StringIO(data)).dtypes
@@ -479,7 +494,6 @@ that column's ``dtype``.
 .. ipython:: python
 
    from pandas.api.types import CategoricalDtype
-
    dtype = CategoricalDtype(['d', 'c', 'b', 'a'], ordered=True)
    pd.read_csv(StringIO(data), dtype={'col1': dtype}).dtypes
 
@@ -525,7 +539,10 @@ used as the column names:
 
 .. ipython:: python
 
-    data = 'a,b,c\n1,2,3\n4,5,6\n7,8,9'
+    data = ('a,b,c\n'
+            '1,2,3\n'
+            '4,5,6\n'
+            '7,8,9')
     print(data)
     pd.read_csv(StringIO(data))
 
@@ -544,7 +561,11 @@ If the header is in a row other than the first, pass the row number to
 
 .. ipython:: python
 
-    data = 'skip this skip it\na,b,c\n1,2,3\n4,5,6\n7,8,9'
+    data = ('skip this skip it\n'
+            'a,b,c\n'
+            '1,2,3\n'
+            '4,5,6\n'
+            '7,8,9')
     pd.read_csv(StringIO(data), header=1)
 
 .. note::
@@ -565,7 +586,9 @@ distinguish between them so as to prevent overwriting data:
 
 .. ipython :: python
 
-   data = 'a,b,a\n0,1,2\n3,4,5'
+   data = ('a,b,a\n'
+           '0,1,2\n'
+           '3,4,5')
    pd.read_csv(StringIO(data))
 
 There is no more duplicate data because ``mangle_dupe_cols=True`` by default,
@@ -633,7 +656,13 @@ be ignored. By default, completely blank lines will be ignored as well.
 
 .. ipython:: python
 
-   data = '\na,b,c\n  \n# commented line\n1,2,3\n\n4,5,6'
+   data = ('\n'
+           'a,b,c\n'
+           '  \n'
+           '# commented line\n'
+           '1,2,3\n'
+           '\n'
+           '4,5,6')
    print(data)
    pd.read_csv(StringIO(data), comment='#')
 
@@ -641,7 +670,12 @@ If ``skip_blank_lines=False``, then ``read_csv`` will not ignore blank lines:
 
 .. ipython:: python
 
-   data = 'a,b,c\n\n1,2,3\n\n\n4,5,6'
+   data = ('a,b,c\n'
+           '\n'
+           '1,2,3\n'
+           '\n'
+           '\n'
+           '4,5,6')
    pd.read_csv(StringIO(data), skip_blank_lines=False)
 
 .. warning::
@@ -652,20 +686,32 @@ If ``skip_blank_lines=False``, then ``read_csv`` will not ignore blank lines:
 
    .. ipython:: python
 
-      data = '#comment\na,b,c\nA,B,C\n1,2,3'
+      data = ('#comment\n'
+              'a,b,c\n'
+              'A,B,C\n'
+              '1,2,3')
       pd.read_csv(StringIO(data), comment='#', header=1)
-      data = 'A,B,C\n#comment\na,b,c\n1,2,3'
+      data = ('A,B,C\n'
+              '#comment\n'
+              'a,b,c\n'
+              '1,2,3')
       pd.read_csv(StringIO(data), comment='#', skiprows=2)
 
    If both ``header`` and ``skiprows`` are specified, ``header`` will be
    relative to the end of ``skiprows``. For example:
 
-   .. ipython:: python
+.. ipython:: python
 
-      data = ('# empty\n# second empty line\n# third empty'
-                'line\nX,Y,Z\n1,2,3\nA,B,C\n1,2.,4.\n5.,NaN,10.0')
-      print(data)
-      pd.read_csv(StringIO(data), comment='#', skiprows=4, header=1)
+   data = ('# empty\n'
+           '# second empty line\n'
+           '# third emptyline\n'
+           'X,Y,Z\n'
+           '1,2,3\n'
+           'A,B,C\n'
+           '1,2.,4.\n'
+           '5.,NaN,10.0\n')
+   print(data)
+   pd.read_csv(StringIO(data), comment='#', skiprows=4, header=1)
 
 .. _io.comments:
 
@@ -677,10 +723,10 @@ Sometimes comments or meta data may be included in a file:
 .. ipython:: python
    :suppress:
 
-   data =  ("ID,level,category\n"
-            "Patient1,123000,x # really unpleasant\n"
-            "Patient2,23000,y # wouldn't take his medicine\n"
-            "Patient3,1234018,z # awesome")
+   data = ("ID,level,category\n"
+           "Patient1,123000,x # really unpleasant\n"
+           "Patient2,23000,y # wouldn't take his medicine\n"
+           "Patient3,1234018,z # awesome")
 
    with open('tmp.csv', 'w') as fh:
        fh.write(data)
@@ -718,7 +764,10 @@ result in byte strings being decoded to unicode in the result:
 
 .. ipython:: python
 
-   data = b'word,length\nTr\xc3\xa4umen,7\nGr\xc3\xbc\xc3\x9fe,5'.decode('utf8').encode('latin-1')
+   data = (b'word,length\n'
+           b'Tr\xc3\xa4umen,7\n'
+           b'Gr\xc3\xbc\xc3\x9fe,5')
+   data = data.decode('utf8').encode('latin-1')
    df = pd.read_csv(BytesIO(data), encoding='latin-1')
    df
    df['word'][1]
@@ -738,12 +787,16 @@ first column will be used as the ``DataFrame``'s row names:
 
 .. ipython:: python
 
-    data = 'a,b,c\n4,apple,bat,5.7\n8,orange,cow,10'
+    data = ('a,b,c\n'
+            '4,apple,bat,5.7\n'
+            '8,orange,cow,10')
     pd.read_csv(StringIO(data))
 
 .. ipython:: python
 
-    data = 'index,a,b,c\n4,apple,bat,5.7\n8,orange,cow,10'
+    data = ('index,a,b,c\n'
+            '4,apple,bat,5.7\n'
+            '8,orange,cow,10')
     pd.read_csv(StringIO(data), index_col=0)
 
 Ordinarily, you can achieve this behavior using the ``index_col`` option.
@@ -754,7 +807,9 @@ index column inference and discard the last column, pass ``index_col=False``:
 
 .. ipython:: python
 
-    data = 'a,b,c\n4,apple,bat,\n8,orange,cow,'
+    data = ('a,b,c\n'
+            '4,apple,bat,\n'
+            '8,orange,cow,')
     print(data)
     pd.read_csv(StringIO(data))
     pd.read_csv(StringIO(data), index_col=False)
@@ -764,7 +819,9 @@ If a subset of data is being parsed using the ``usecols`` option, the
 
 .. ipython:: python
 
-    data = 'a,b,c\n4,apple,bat,\n8,orange,cow,'
+    data = ('a,b,c\n'
+            '4,apple,bat,\n'
+            '8,orange,cow,')
     print(data)
     pd.read_csv(StringIO(data), usecols=['b', 'c'])
     pd.read_csv(StringIO(data), usecols=['b', 'c'], index_col=0)
@@ -812,12 +869,12 @@ column names:
 .. ipython:: python
    :suppress:
 
-   data =  ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
-            "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
-            "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n"
-            "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n"
-            "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n"
-            "KORD,19990127, 23:00:00, 22:56:00, -0.5900")
+   data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
+           "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
+           "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n"
+           "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n"
+           "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n"
+           "KORD,19990127, 23:00:00, 22:56:00, -0.5900")
 
    with open('tmp.csv', 'w') as fh:
        fh.write(data)
@@ -895,9 +952,8 @@ take full advantage of the flexibility of the date parsing API:
 
 .. ipython:: python
 
-   import pandas.io.date_converters as conv
    df = pd.read_csv('tmp.csv', header=None, parse_dates=date_spec,
-                    date_parser=conv.parse_date_time)
+                    date_parser=pd.io.date_converters.parse_date_time)
    df
 
 Pandas will try to call the ``date_parser`` function in three different ways. If
@@ -990,9 +1046,12 @@ DD/MM/YYYY instead. For convenience, a ``dayfirst`` keyword is provided:
 .. ipython:: python
    :suppress:
 
-   data = "date,value,cat\n1/6/2000,5,a\n2/6/2000,10,b\n3/6/2000,15,c"
+   data = ("date,value,cat\n"
+           "1/6/2000,5,a\n"
+           "2/6/2000,10,b\n"
+           "3/6/2000,15,c")
    with open('tmp.csv', 'w') as fh:
-        fh.write(data)
+       fh.write(data)
 
 .. ipython:: python
 
@@ -1016,9 +1075,12 @@ writing to a file). For example:
 
    val = '0.3066101993807095471566981359501369297504425048828125'
    data = 'a,b,c\n1,2,{0}'.format(val)
-   abs(pd.read_csv(StringIO(data), engine='c', float_precision=None)['c'][0] - float(val))
-   abs(pd.read_csv(StringIO(data), engine='c', float_precision='high')['c'][0] - float(val))
-   abs(pd.read_csv(StringIO(data), engine='c', float_precision='round_trip')['c'][0] - float(val))
+   abs(pd.read_csv(StringIO(data), engine='c',
+                   float_precision=None)['c'][0] - float(val))
+   abs(pd.read_csv(StringIO(data), engine='c',
+                   float_precision='high')['c'][0] - float(val))
+   abs(pd.read_csv(StringIO(data), engine='c',
+                   float_precision='round_trip')['c'][0] - float(val))
 
 
 .. _io.thousands:
@@ -1033,10 +1095,10 @@ correctly:
 .. ipython:: python
    :suppress:
 
-   data =  ("ID|level|category\n"
-            "Patient1|123,000|x\n"
-            "Patient2|23,000|y\n"
-            "Patient3|1,234,018|z")
+   data = ("ID|level|category\n"
+           "Patient1|123,000|x\n"
+           "Patient2|23,000|y\n"
+           "Patient3|1,234,018|z")
 
    with open('tmp.csv', 'w') as fh:
        fh.write(data)
@@ -1132,10 +1194,10 @@ as a ``Series``:
 .. ipython:: python
    :suppress:
 
-   data =  ("level\n"
-            "Patient1,123000\n"
-            "Patient2,23000\n"
-            "Patient3,1234018")
+   data = ("level\n"
+           "Patient1,123000\n"
+           "Patient2,23000\n"
+           "Patient3,1234018")
 
    with open('tmp.csv', 'w') as fh:
        fh.write(data)
@@ -1144,7 +1206,7 @@ as a ``Series``:
 
    print(open('tmp.csv').read())
 
-   output =  pd.read_csv('tmp.csv', squeeze=True)
+   output = pd.read_csv('tmp.csv', squeeze=True)
    output
 
    type(output)
@@ -1166,7 +1228,9 @@ options as follows:
 
 .. ipython:: python
 
-    data= 'a,b,c\n1,Yes,2\n3,No,4'
+    data = ('a,b,c\n'
+            '1,Yes,2\n'
+            '3,No,4')
     print(data)
     pd.read_csv(StringIO(data))
     pd.read_csv(StringIO(data), true_values=['Yes'], false_values=['No'])
@@ -1183,11 +1247,17 @@ too many fields will raise an error by default:
 .. ipython:: python
    :suppress:
 
-    data = 'a,b,c\n1,2,3\n4,5,6,7\n8,9,10'
+    data = ('a,b,c\n'
+            '1,2,3\n'
+            '4,5,6,7\n'
+            '8,9,10')
 
 .. code-block:: ipython
 
-    In [27]: data = 'a,b,c\n1,2,3\n4,5,6,7\n8,9,10'
+    In [27]: data = ('a,b,c\n'
+                     '1,2,3\n'
+                     '4,5,6,7\n'
+                     '8,9,10')
 
     In [28]: pd.read_csv(StringIO(data))
     ---------------------------------------------------------------------------
@@ -1437,7 +1507,7 @@ returned object:
 
 .. ipython:: python
 
-   df = pd.read_csv("data/mindex_ex.csv", index_col=[0,1])
+   df = pd.read_csv("data/mindex_ex.csv", index_col=[0, 1])
    df
    df.loc[1978]
 
@@ -1480,7 +1550,6 @@ with ``df.to_csv(..., index=False)``, then any ``names`` on the columns index wi
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('mi.csv')
    os.remove('mi2.csv')
 
@@ -1966,9 +2035,8 @@ Preserve string indices:
 
 .. ipython:: python
 
-   si = pd.DataFrame(np.zeros((4, 4)),
-            columns=list(range(4)),
-            index=[str(i) for i in range(4)])
+   si = pd.DataFrame(np.zeros((4, 4)), columns=list(range(4)),
+                     index=[str(i) for i in range(4)])
    si
    si.index
    si.columns
@@ -2020,11 +2088,11 @@ data:
 
 .. ipython:: python
 
-   timeit pd.read_json(jsonfloats)
+   %timeit pd.read_json(jsonfloats)
 
 .. ipython:: python
 
-   timeit pd.read_json(jsonfloats, numpy=True)
+   %timeit pd.read_json(jsonfloats, numpy=True)
 
 The speedup is less noticeable for smaller datasets:
 
@@ -2034,11 +2102,11 @@ The speedup is less noticeable for smaller datasets:
 
 .. ipython:: python
 
-   timeit pd.read_json(jsonfloats)
+   %timeit pd.read_json(jsonfloats)
 
 .. ipython:: python
 
-   timeit pd.read_json(jsonfloats, numpy=True)
+   %timeit pd.read_json(jsonfloats, numpy=True)
 
 .. warning::
 
@@ -2059,7 +2127,6 @@ The speedup is less noticeable for smaller datasets:
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('test.json')
 
 .. _io.json_normalize:
@@ -2081,20 +2148,16 @@ into a flat table.
 .. ipython:: python
 
    data = [{'state': 'Florida',
-             'shortname': 'FL',
-             'info': {
-                  'governor': 'Rick Scott'
-             },
-             'counties': [{'name': 'Dade', 'population': 12345},
+            'shortname': 'FL',
+            'info': {'governor': 'Rick Scott'},
+            'counties': [{'name': 'Dade', 'population': 12345},
                          {'name': 'Broward', 'population': 40000},
                          {'name': 'Palm Beach', 'population': 60000}]},
-            {'state': 'Ohio',
-             'shortname': 'OH',
-             'info': {
-                  'governor': 'John Kasich'
-             },
-             'counties': [{'name': 'Summit', 'population': 1234},
-                          {'name': 'Cuyahoga', 'population': 1337}]}]
+           {'state': 'Ohio',
+            'shortname': 'OH',
+            'info': {'governor': 'John Kasich'},
+            'counties': [{'name': 'Summit', 'population': 1234},
+                         {'name': 'Cuyahoga', 'population': 1337}]}]
 
    json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])
 
@@ -2142,11 +2205,10 @@ a JSON string with two fields, ``schema`` and ``data``.
 
 .. ipython:: python
 
-   df = pd.DataFrame(
-       {'A': [1, 2, 3],
-        'B': ['a', 'b', 'c'],
-        'C': pd.date_range('2016-01-01', freq='d', periods=3),
-       }, index=pd.Index(range(3), name='idx'))
+   df = pd.DataFrame({'A': [1, 2, 3],
+                      'B': ['a', 'b', 'c'],
+                      'C': pd.date_range('2016-01-01', freq='d', periods=3)},
+                     index=pd.Index(range(3), name='idx'))
    df
    df.to_json(orient='table', date_format="iso")
 
@@ -2322,7 +2384,6 @@ as a string:
 .. ipython:: python
    :suppress:
 
-   import os
    file_path = os.path.abspath(os.path.join('source', '_static', 'banklist.html'))
 
 .. ipython:: python
@@ -2820,8 +2881,8 @@ For example, to read in a ``MultiIndex`` index without names:
 
 .. ipython:: python
 
-   df = pd.DataFrame({'a':[1, 2, 3, 4], 'b':[5, 6, 7, 8]},
-                     index=pd.MultiIndex.from_product([['a', 'b'],['c', 'd']]))
+   df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]},
+                     index=pd.MultiIndex.from_product([['a', 'b'], ['c', 'd']]))
    df.to_excel('path_to_file.xlsx')
    df = pd.read_excel('path_to_file.xlsx', index_col=[0, 1])
    df
@@ -2842,7 +2903,8 @@ should be passed to ``index_col`` and ``header``:
 
 .. ipython:: python
 
-   df.columns = pd.MultiIndex.from_product([['a'], ['b', 'd']], names=['c1', 'c2'])
+   df.columns = pd.MultiIndex.from_product([['a'], ['b', 'd']],
+                                           names=['c1', 'c2'])
    df.to_excel('path_to_file.xlsx')
    df = pd.read_excel('path_to_file.xlsx', index_col=[0, 1], header=[0, 1])
    df
@@ -2850,7 +2912,6 @@ should be passed to ``index_col`` and ``header``:
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('path_to_file.xlsx')
 
 
@@ -2997,7 +3058,7 @@ one can pass an :class:`~pandas.io.excel.ExcelWriter`.
 
 .. code-block:: python
 
-   with ExcelWriter('path_to_file.xlsx') as writer:
+   with pd.ExcelWriter('path_to_file.xlsx') as writer:
        df1.to_excel(writer, sheet_name='Sheet1')
        df2.to_excel(writer, sheet_name='Sheet2')
 
@@ -3029,7 +3090,7 @@ Pandas supports writing Excel files to buffer-like objects such as ``StringIO``
    bio = BytesIO()
 
    # By setting the 'engine' in the ExcelWriter constructor.
-   writer = ExcelWriter(bio, engine='xlsxwriter')
+   writer = pd.ExcelWriter(bio, engine='xlsxwriter')
    df.to_excel(writer, sheet_name='Sheet1')
 
    # Save the workbook
@@ -3082,7 +3143,7 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are:
    df.to_excel('path_to_file.xlsx', sheet_name='Sheet1', engine='xlsxwriter')
 
    # By setting the 'engine' in the ExcelWriter constructor.
-   writer = ExcelWriter('path_to_file.xlsx', engine='xlsxwriter')
+   writer = pd.ExcelWriter('path_to_file.xlsx', engine='xlsxwriter')
 
    # Or via pandas configuration.
    from pandas import options                                     # noqa: E402
@@ -3172,7 +3233,6 @@ any pickled pandas object (or any other pickled object) from file:
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('foo.pkl')
 
 .. warning::
@@ -3249,7 +3309,6 @@ The default is to 'infer':
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove("data.pkl.compress")
    os.remove("data.pkl.xz")
    os.remove("data.pkl.gz")
@@ -3306,7 +3365,7 @@ pandas objects.
 
 .. ipython:: python
 
-   pd.to_msgpack('foo2.msg', {'dict': [{ 'df': df }, {'string': 'foo'},
+   pd.to_msgpack('foo2.msg', {'dict': [{'df': df}, {'string': 'foo'},
                                        {'scalar': 1.}, {'s': s}]})
    pd.read_msgpack('foo2.msg')
 
@@ -3365,7 +3424,6 @@ dict:
 
 .. ipython:: python
 
-   np.random.seed(1234)
    index = pd.date_range('1/1/2000', periods=8)
    s = pd.Series(randn(5), index=['a', 'b', 'c', 'd', 'e'])
    df = pd.DataFrame(randn(8, 3), index=index,
@@ -3421,7 +3479,6 @@ Closing a Store and using a context manager:
    :suppress:
 
    store.close()
-   import os
    os.remove('store.h5')
 
 
@@ -3434,8 +3491,8 @@ similar to how ``read_csv`` and ``to_csv`` work.
 
 .. ipython:: python
 
-   df_tl = pd.DataFrame(dict(A=list(range(5)), B=list(range(5))))
-   df_tl.to_hdf('store_tl.h5','table', append=True)
+   df_tl = pd.DataFrame({'A': list(range(5)), 'B': list(range(5))})
+   df_tl.to_hdf('store_tl.h5', 'table', append=True)
    pd.read_hdf('store_tl.h5', 'table', where=['index>2'])
 
 .. ipython:: python
@@ -3447,10 +3504,6 @@ similar to how ``read_csv`` and ``to_csv`` work.
 
 HDFStore will by default not drop rows that are all missing. This behavior can be changed by setting ``dropna=True``.
 
-.. ipython:: python
-   :suppress:
-
-   import os
 
 .. ipython:: python
 
@@ -3459,12 +3512,12 @@ HDFStore will by default not drop rows that are all missing. This behavior can b
    df_with_missing
 
    df_with_missing.to_hdf('file.h5', 'df_with_missing',
-                           format='table', mode='w')
+                          format='table', mode='w')
 
    pd.read_hdf('file.h5', 'df_with_missing')
 
    df_with_missing.to_hdf('file.h5', 'df_with_missing',
-                           format='table', mode='w', dropna=True)
+                          format='table', mode='w', dropna=True)
    pd.read_hdf('file.h5', 'df_with_missing')
 
 
@@ -3478,13 +3531,13 @@ This is also true for the major axis of a ``Panel``:
 .. ipython:: python
 
    matrix = [[[np.nan, np.nan, np.nan], [1, np.nan, np.nan]],
-            [[np.nan, np.nan, np.nan], [np.nan, 5, 6]],
-            [[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]]
+             [[np.nan, np.nan, np.nan], [np.nan, 5, 6]],
+             [[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]]
 
-   panel_with_major_axis_all_missing=pd.Panel(matrix,
-           items=['Item1', 'Item2', 'Item3'],
-           major_axis=[1, 2],
-           minor_axis=['A', 'B', 'C'])
+   panel_with_major_axis_all_missing = pd.Panel(matrix,
+                                                items=['Item1', 'Item2', 'Item3'],
+                                                major_axis=[1, 2],
+                                                minor_axis=['A', 'B', 'C'])
 
    panel_with_major_axis_all_missing
 
@@ -3585,7 +3638,7 @@ everything in the sub-store and **below**, so be *careful*.
 
    store.put('foo/bar/bah', df)
    store.append('food/orange', df)
-   store.append('food/apple',  df)
+   store.append('food/apple', df)
    store
 
    # a list of keys are returned
@@ -3660,14 +3713,15 @@ defaults to `nan`.
     df_mixed = pd.DataFrame({'A': randn(8),
                              'B': randn(8),
                              'C': np.array(randn(8), dtype='float32'),
-                             'string':'string',
+                             'string': 'string',
                              'int': 1,
                              'bool': True,
                              'datetime64': pd.Timestamp('20010102')},
                             index=list(range(8)))
-    df_mixed.loc[df_mixed.index[3:5], ['A', 'B', 'string', 'datetime64']] = np.nan
+    df_mixed.loc[df_mixed.index[3:5],
+                 ['A', 'B', 'string', 'datetime64']] = np.nan
 
-    store.append('df_mixed', df_mixed, min_itemsize = {'values': 50})
+    store.append('df_mixed', df_mixed, min_itemsize={'values': 50})
     df_mixed1 = store.select('df_mixed')
     df_mixed1
     df_mixed1.get_dtype_counts()
@@ -3820,7 +3874,8 @@ Works with a Panel as well.
 
    store.append('wp', wp)
    store
-   store.select('wp', "major_axis>pd.Timestamp('20000102') & minor_axis=['A', 'B']")
+   store.select('wp',
+                "major_axis>pd.Timestamp('20000102') & minor_axis=['A', 'B']")
 
 The ``columns`` keyword can be supplied to select a list of columns to be
 returned, this is equivalent to passing a
@@ -3863,7 +3918,10 @@ specified in the format: ``<float>(<unit>)``, where float may be signed (and fra
 .. ipython:: python
 
    from datetime import timedelta
-   dftd = pd.DataFrame(dict(A = pd.Timestamp('20130101'), B = [ pd.Timestamp('20130101') + timedelta(days=i, seconds=10) for i in range(10) ]))
+   dftd = pd.DataFrame({'A': pd.Timestamp('20130101'),
+                        'B': [pd.Timestamp('20130101') + timedelta(days=i,
+                                                                   seconds=10)
+                              for i in range(10)]})
    dftd['C'] = dftd['A'] - dftd['B']
    dftd
    store.append('dftd', dftd, data_columns=True)
@@ -3940,14 +3998,14 @@ be ``data_columns``.
 
    df_dc = df.copy()
    df_dc['string'] = 'foo'
-   df_dc.loc[df_dc.index[4: 6], 'string'] = np.nan
-   df_dc.loc[df_dc.index[7: 9], 'string'] = 'bar'
+   df_dc.loc[df_dc.index[4:6], 'string'] = np.nan
+   df_dc.loc[df_dc.index[7:9], 'string'] = 'bar'
    df_dc['string2'] = 'cool'
-   df_dc.loc[df_dc.index[1: 3], ['B', 'C']] = 1.0
+   df_dc.loc[df_dc.index[1:3], ['B', 'C']] = 1.0
    df_dc
 
    # on-disk operations
-   store.append('df_dc', df_dc, data_columns = ['B', 'C', 'string', 'string2'])
+   store.append('df_dc', df_dc, data_columns=['B', 'C', 'string', 'string2'])
    store.select('df_dc', where='B > 0')
 
    # getting creative
@@ -3976,7 +4034,7 @@ The default is 50,000 rows returned in a chunk.
 .. ipython:: python
 
    for df in store.select('df', chunksize=3):
-      print(df)
+       print(df)
 
 .. note::
 
@@ -4003,12 +4061,12 @@ chunks.
    store.append('dfeq', dfeq, data_columns=['number'])
 
    def chunks(l, n):
-        return [l[i: i+n] for i in range(0, len(l), n)]
+       return [l[i:i + n] for i in range(0, len(l), n)]
 
    evens = [2, 4, 6, 8, 10]
    coordinates = store.select_as_coordinates('dfeq', 'number=evens')
    for c in chunks(coordinates, 2):
-        print(store.select('dfeq', where=c))
+       print(store.select('dfeq', where=c))
 
 Advanced Queries
 ++++++++++++++++
@@ -4105,13 +4163,13 @@ results.
 .. ipython:: python
 
    df_mt = pd.DataFrame(randn(8, 6), index=pd.date_range('1/1/2000', periods=8),
-                                     columns=['A', 'B', 'C', 'D', 'E', 'F'])
+                        columns=['A', 'B', 'C', 'D', 'E', 'F'])
    df_mt['foo'] = 'bar'
    df_mt.loc[df_mt.index[1], ('A', 'B')] = np.nan
 
    # you can also create the tables individually
-   store.append_to_multiple({'df1_mt': ['A', 'B'], 'df2_mt': None },
-                             df_mt, selector='df1_mt')
+   store.append_to_multiple({'df1_mt': ['A', 'B'], 'df2_mt': None},
+                            df_mt, selector='df1_mt')
    store
 
    # individual tables were created
@@ -4120,7 +4178,7 @@ results.
 
    # as a multiple
    store.select_as_multiple(['df1_mt', 'df2_mt'], where=['A>0', 'B>0'],
-                             selector = 'df1_mt')
+                            selector='df1_mt')
 
 
 Delete from a Table
@@ -4159,7 +4217,7 @@ the table using a ``where`` that selects all but the missing data.
 .. ipython:: python
 
    # returns the number of rows deleted
-   store.remove('wp', 'major_axis > 20000102' )
+   store.remove('wp', 'major_axis > 20000102')
    store.select('wp')
 
 .. warning::
@@ -4332,7 +4390,7 @@ stored in a more efficient manner.
 .. ipython:: python
 
    dfcat = pd.DataFrame({'A': pd.Series(list('aabbcdba')).astype('category'),
-                         'B': np.random.randn(8) })
+                         'B': np.random.randn(8)})
    dfcat
    dfcat.dtypes
    cstore = pd.HDFStore('cats.h5', mode='w')
@@ -4346,7 +4404,6 @@ stored in a more efficient manner.
    :okexcept:
 
    cstore.close()
-   import os
    os.remove('cats.h5')
 
 
@@ -4374,7 +4431,7 @@ Passing a ``min_itemsize`` dict will cause all passed columns to be created as *
 
 .. ipython:: python
 
-   dfs = pd.DataFrame(dict(A='foo', B='bar'), index=list(range(5)))
+   dfs = pd.DataFrame({'A': 'foo', 'B': 'bar'}, index=list(range(5)))
    dfs
 
    # A and B have a size of 30
@@ -4393,7 +4450,7 @@ You could inadvertently turn an actual ``nan`` value into a missing value.
 
 .. ipython:: python
 
-   dfss = pd.DataFrame(dict(A=['foo', 'bar', 'nan']))
+   dfss = pd.DataFrame({'A': ['foo', 'bar', 'nan']})
    dfss
 
    store.append('dfss', dfss)
@@ -4420,11 +4477,10 @@ It is possible to write an ``HDFStore`` object that can easily be imported into
 
 .. ipython:: python
 
-   np.random.seed(1)
    df_for_r = pd.DataFrame({"first": np.random.rand(100),
                             "second": np.random.rand(100),
                             "class": np.random.randint(0, 2, (100, ))},
-                            index=range(100))
+                           index=range(100))
    df_for_r.head()
 
    store_export = pd.HDFStore('export.h5')
@@ -4435,7 +4491,6 @@ It is possible to write an ``HDFStore`` object that can easily be imported into
    :suppress:
 
    store_export.close()
-   import os
    os.remove('export.h5')
 
 In R this file can be read into a ``data.frame`` object using the ``rhdf5``
@@ -4523,7 +4578,6 @@ Performance
    :suppress:
 
    store.close()
-   import os
    os.remove('store.h5')
 
 
@@ -4589,7 +4643,6 @@ Read from a feather file.
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('example.feather')
 
 
@@ -4673,7 +4726,6 @@ Read only certain columns of a parquet file.
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('example_pa.parquet')
    os.remove('example_fp.parquet')
 
@@ -4722,7 +4774,8 @@ Parquet supports partitioning of data based on the values of one or more columns
 .. ipython:: python
 
     df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]})
-    df.to_parquet(fname='test', engine='pyarrow', partition_cols=['a'], compression=None)
+    df.to_parquet(fname='test', engine='pyarrow',
+                  partition_cols=['a'], compression=None)
 
 The `fname` specifies the parent directory to which data will be saved.
 The `partition_cols` are the column names by which the dataset will be partitioned.
@@ -4835,14 +4888,15 @@ the database using :func:`~pandas.DataFrame.to_sql`.
 
    import datetime
    c = ['id', 'Date', 'Col_1', 'Col_2', 'Col_3']
-   d = [(26, datetime.datetime(2010,10,18), 'X', 27.5, True),
-   (42, datetime.datetime(2010,10,19), 'Y', -12.5, False),
-   (63, datetime.datetime(2010,10,20), 'Z', 5.73, True)]
+   d = [(26, datetime.datetime(2010, 10, 18), 'X', 27.5, True),
+        (42, datetime.datetime(2010, 10, 19), 'Y', -12.5, False),
+        (63, datetime.datetime(2010, 10, 20), 'Z', 5.73, True)]
 
-   data  = pd.DataFrame(d, columns=c)
+   data = pd.DataFrame(d, columns=c)
 
 .. ipython:: python
 
+    data
     data.to_sql('data', engine)
 
 With some databases, writing large DataFrames can result in errors due to
@@ -4999,7 +5053,8 @@ Specifying this will return an iterator through chunks of the query result:
 
 .. ipython:: python
 
-    for chunk in pd.read_sql_query("SELECT * FROM data_chunks", engine, chunksize=5):
+    for chunk in pd.read_sql_query("SELECT * FROM data_chunks",
+                                   engine, chunksize=5):
         print(chunk)
 
 You can also run a plain query without creating a ``DataFrame`` with
@@ -5064,12 +5119,12 @@ If you have an SQLAlchemy description of your database you can express where con
 
    metadata = sa.MetaData()
    data_table = sa.Table('data', metadata,
-       sa.Column('index', sa.Integer),
-       sa.Column('Date', sa.DateTime),
-       sa.Column('Col_1', sa.String),
-       sa.Column('Col_2', sa.Float),
-       sa.Column('Col_3', sa.Boolean),
-   )
+                         sa.Column('index', sa.Integer),
+                         sa.Column('Date', sa.DateTime),
+                         sa.Column('Col_1', sa.String),
+                         sa.Column('Col_2', sa.Float),
+                         sa.Column('Col_3', sa.Boolean),
+                         )
 
    pd.read_sql(sa.select([data_table]).where(data_table.c.Col_3 == True), engine)
 
@@ -5239,7 +5294,6 @@ values will have ``object`` data type.
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('stata.dta')
 
 .. _io.stata-categorical:
@@ -5452,9 +5506,6 @@ And here's the code:
 
 .. code-block:: python
 
-   import os
-   import pandas as pd
-   import sqlite3
    from numpy.random import randn
 
    sz = 1000000

From 92d25f0da6c3b1175047cba8c900e04da68920b8 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Sun, 2 Dec 2018 20:20:16 +0100
Subject: [PATCH 62/78] Proper boxing of scalars in `DataFrame.to_dict`
 (#23921)

Closes gh-23753
---
 doc/source/whatsnew/v0.24.0.rst       |  2 ++
 pandas/core/frame.py                  | 22 +++++++++++----------
 pandas/tests/frame/test_convert_to.py | 28 ++++++++++++++++-----------
 3 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 75d2196fe0b1eb..f2cb6a3389a6dc 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1417,6 +1417,7 @@ MultiIndex
 I/O
 ^^^
 
+
 .. _whatsnew_0240.bug_fixes.nan_with_str_dtype:
 
 Proper handling of `np.NaN` in a string data-typed column with the Python engine
@@ -1480,6 +1481,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - Bug in :meth:`read_excel()` in which column names were not being properly converted to string sometimes in Python 2.x (:issue:`23874`)
 - Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`18792`, :issue:`20480`)
 - Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`)
+- Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`)
 - :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`)
 
 Plotting
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 06519da9a26d56..f50be694b47c6f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1298,10 +1298,10 @@ def to_dict(self, orient='dict', into=dict):
 
         >>> df.to_dict('split')
         {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
-         'data': [[1.0, 0.5], [2.0, 0.75]]}
+         'data': [[1, 0.5], [2, 0.75]]}
 
         >>> df.to_dict('records')
-        [{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}]
+        [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]
 
         >>> df.to_dict('index')
         {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
@@ -1317,8 +1317,8 @@ def to_dict(self, orient='dict', into=dict):
 
         >>> dd = defaultdict(list)
         >>> df.to_dict('records', into=dd)
-        [defaultdict(<class 'list'>, {'col1': 1.0, 'col2': 0.5}),
-         defaultdict(<class 'list'>, {'col1': 2.0, 'col2': 0.75})]
+        [defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
+         defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
         """
         if not self.columns.is_unique:
             warnings.warn("DataFrame columns are not unique, some "
@@ -1334,16 +1334,18 @@ def to_dict(self, orient='dict', into=dict):
         elif orient.lower().startswith('sp'):
             return into_c((('index', self.index.tolist()),
                            ('columns', self.columns.tolist()),
-                           ('data', lib.map_infer(self.values.ravel(),
-                                                  com.maybe_box_datetimelike)
-                            .reshape(self.values.shape).tolist())))
+                           ('data', [
+                               list(map(com.maybe_box_datetimelike, t))
+                               for t in self.itertuples(index=False)]
+                            )))
         elif orient.lower().startswith('s'):
             return into_c((k, com.maybe_box_datetimelike(v))
                           for k, v in compat.iteritems(self))
         elif orient.lower().startswith('r'):
-            return [into_c((k, com.maybe_box_datetimelike(v))
-                           for k, v in zip(self.columns, np.atleast_1d(row)))
-                    for row in self.values]
+            return [
+                into_c((k, com.maybe_box_datetimelike(v))
+                       for k, v in compat.iteritems(row._asdict()))
+                for row in self.itertuples(index=False)]
         elif orient.lower().startswith('i'):
             if not self.index.is_unique:
                 raise ValueError(
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 61fe9d12c173ca..f1eb6a33eddeba 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -150,7 +150,7 @@ def test_to_records_index_name(self):
     def test_to_records_with_unicode_index(self):
         # GH13172
         # unicode_literals conflict with to_records
-        result = DataFrame([{u'a': u'x', u'b': 'y'}]).set_index(u'a')\
+        result = DataFrame([{u'a': u'x', u'b': 'y'}]).set_index(u'a') \
             .to_records()
         expected = np.rec.array([('x', 'y')], dtype=[('a', 'O'), ('b', 'O')])
         tm.assert_almost_equal(result, expected)
@@ -281,17 +281,23 @@ def test_to_records_datetimeindex_with_tz(self, tz):
         # both converted to UTC, so they are equal
         tm.assert_numpy_array_equal(result, expected)
 
-    def test_to_dict_box_scalars(self):
-        # 14216
+    # orient - orient argument to to_dict function
+    # item_getter - function for extracting value from
+    # the resulting dict using column name and index
+    @pytest.mark.parametrize('orient,item_getter', [
+        ('dict', lambda d, col, idx: d[col][idx]),
+        ('records', lambda d, col, idx: d[idx][col]),
+        ('list', lambda d, col, idx: d[col][idx]),
+        ('split', lambda d, col, idx: d['data'][idx][d['columns'].index(col)]),
+        ('index', lambda d, col, idx: d[idx][col])
+    ])
+    def test_to_dict_box_scalars(self, orient, item_getter):
+        # 14216, 23753
         # make sure that we are boxing properly
-        d = {'a': [1], 'b': ['b']}
-
-        result = DataFrame(d).to_dict()
-        assert isinstance(list(result['a'])[0], (int, long))
-        assert isinstance(list(result['b'])[0], (int, long))
-
-        result = DataFrame(d).to_dict(orient='records')
-        assert isinstance(result[0]['a'], (int, long))
+        df = DataFrame({'a': [1, 2], 'b': [.1, .2]})
+        result = df.to_dict(orient=orient)
+        assert isinstance(item_getter(result, 'a', 0), (int, long))
+        assert isinstance(item_getter(result, 'b', 0), float)
 
     def test_frame_to_dict_tz(self):
         # GH18372 When converting to dict with orient='records' columns of

From 7a885eef0c79832ddb1f46e87a1e0a103d0f009b Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Sun, 2 Dec 2018 21:58:48 +0100
Subject: [PATCH 63/78] BUG: all-na corner case for str.cat (#24045)

---
 doc/source/whatsnew/v0.24.0.rst |  1 +
 pandas/core/strings.py          |  6 ++++--
 pandas/tests/test_strings.py    | 30 +++++++++++++++++++++++++-----
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index f2cb6a3389a6dc..5dddae290fd37b 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1365,6 +1365,7 @@ Strings
 - Bug in :meth:`Index.str.partition` was not nan-safe (:issue:`23558`).
 - Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`).
 - Bug :func:`Series.str.contains` not respecting the ``na`` argument for a ``Categorical`` dtype ``Series`` (:issue:`22158`)
+- Bug in :meth:`Index.str.cat` when the result contained only ``NaN`` (:issue:`24044`)
 
 Interval
 ^^^^^^^^
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 0b791f6f91aa36..995700e79cb50c 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -2260,9 +2260,11 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
             result = cat_core(all_cols, sep)
 
         if isinstance(self._orig, Index):
-            result = Index(result, name=self._orig.name)
+            # add dtype for case that result is all-NA
+            result = Index(result, dtype=object, name=self._orig.name)
         else:  # Series
-            result = Series(result, index=data.index, name=self._orig.name)
+            result = Series(result, dtype=object, index=data.index,
+                            name=self._orig.name)
         return result
 
     _shared_docs['str_split'] = ("""
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 117984ce897434..ced8d37678b8b0 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -630,11 +630,31 @@ def test_str_cat_align_mixed_inputs(self, join):
         with pytest.raises(ValueError, match=rgx):
             s.str.cat([t, z], join=join)
 
-    def test_str_cat_raises(self):
-        # non-strings hiding behind object dtype
-        s = Series([1, 2, 3, 4], dtype='object')
-        with pytest.raises(TypeError, match="unsupported operand type.*"):
-            s.str.cat(s)
+    @pytest.mark.parametrize('box', [Series, Index])
+    @pytest.mark.parametrize('other', [Series, Index])
+    def test_str_cat_all_na(self, box, other):
+        # GH 24044
+
+        # check that all NaNs in caller / target work
+        s = Index(['a', 'b', 'c', 'd'])
+        s = s if box == Index else Series(s, index=s)
+        t = other([np.nan] * 4, dtype=object)
+        # add index of s for alignment
+        t = t if other == Index else Series(t, index=s)
+
+        # all-NA target
+        if box == Series:
+            expected = Series([np.nan] * 4, index=s.index, dtype=object)
+        else:  # box == Index
+            expected = Index([np.nan] * 4, dtype=object)
+        result = s.str.cat(t, join='left')
+        assert_series_or_index_equal(result, expected)
+
+        # all-NA caller (only for Series)
+        if other == Series:
+            expected = Series([np.nan] * 4, dtype=object, index=t.index)
+            result = t.str.cat(s, join='left')
+            tm.assert_series_equal(result, expected)
 
     def test_str_cat_special_cases(self):
         s = Series(['a', 'b', 'c', 'd'])

From e4d494b89d746c5fcb59ecbe33516ec3d03567b8 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 2 Dec 2018 13:36:19 -0800
Subject: [PATCH 64/78] catch OutOfBoundsDatetime in just one place (#24049)

---
 pandas/_libs/tslib.pyx | 267 +++++++++++++++++++----------------------
 1 file changed, 121 insertions(+), 146 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index a3e6c7e3449401..776ead1931e3e2 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -527,182 +527,157 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
         for i in range(n):
             val = values[i]
 
-            if checknull_with_nat(val):
-                iresult[i] = NPY_NAT
+            try:
+                if checknull_with_nat(val):
+                    iresult[i] = NPY_NAT
 
-            elif PyDateTime_Check(val):
-                seen_datetime = 1
-                if val.tzinfo is not None:
-                    if utc_convert:
-                        try:
+                elif PyDateTime_Check(val):
+                    seen_datetime = 1
+                    if val.tzinfo is not None:
+                        if utc_convert:
                             _ts = convert_datetime_to_tsobject(val, None)
                             iresult[i] = _ts.value
-                        except OutOfBoundsDatetime:
-                            if is_coerce:
-                                iresult[i] = NPY_NAT
-                                continue
-                            raise
+                        else:
+                            raise ValueError('Tz-aware datetime.datetime '
+                                             'cannot be converted to '
+                                             'datetime64 unless utc=True')
                     else:
-                        raise ValueError('Tz-aware datetime.datetime cannot '
-                                         'be converted to datetime64 unless '
-                                         'utc=True')
-                else:
-                    iresult[i] = pydatetime_to_dt64(val, &dts)
-                    if not PyDateTime_CheckExact(val):
-                        # i.e. a Timestamp object
-                        iresult[i] += val.nanosecond
-                    try:
+                        iresult[i] = pydatetime_to_dt64(val, &dts)
+                        if not PyDateTime_CheckExact(val):
+                            # i.e. a Timestamp object
+                            iresult[i] += val.nanosecond
                         check_dts_bounds(&dts)
-                    except OutOfBoundsDatetime:
-                        if is_coerce:
-                            iresult[i] = NPY_NAT
-                            continue
-                        raise
 
-            elif PyDate_Check(val):
-                seen_datetime = 1
-                iresult[i] = pydate_to_dt64(val, &dts)
-                try:
+                elif PyDate_Check(val):
+                    seen_datetime = 1
+                    iresult[i] = pydate_to_dt64(val, &dts)
                     check_dts_bounds(&dts)
-                except OutOfBoundsDatetime:
-                    if is_coerce:
-                        iresult[i] = NPY_NAT
-                        continue
-                    raise
 
-            elif is_datetime64_object(val):
-                seen_datetime = 1
-                try:
+                elif is_datetime64_object(val):
+                    seen_datetime = 1
                     iresult[i] = get_datetime64_nanos(val)
-                except OutOfBoundsDatetime:
-                    if is_coerce:
-                        iresult[i] = NPY_NAT
-                        continue
-                    raise
 
-            elif is_integer_object(val) or is_float_object(val):
-                # these must be ns unit by-definition
-                seen_integer = 1
+                elif is_integer_object(val) or is_float_object(val):
+                    # these must be ns unit by-definition
+                    seen_integer = 1
 
-                if val != val or val == NPY_NAT:
-                    iresult[i] = NPY_NAT
-                elif is_raise or is_ignore:
-                    iresult[i] = val
-                else:
-                    # coerce
-                    # we now need to parse this as if unit='ns'
-                    # we can ONLY accept integers at this point
-                    # if we have previously (or in future accept
-                    # datetimes/strings, then we must coerce)
-                    try:
-                        iresult[i] = cast_from_unit(val, 'ns')
-                    except:
+                    if val != val or val == NPY_NAT:
                         iresult[i] = NPY_NAT
+                    elif is_raise or is_ignore:
+                        iresult[i] = val
+                    else:
+                        # coerce
+                        # we now need to parse this as if unit='ns'
+                        # we can ONLY accept integers at this point
+                        # if we have previously (or in future accept
+                        # datetimes/strings, then we must coerce)
+                        try:
+                            iresult[i] = cast_from_unit(val, 'ns')
+                        except:
+                            iresult[i] = NPY_NAT
 
-            elif is_string_object(val):
-                # string
-                seen_string = 1
-
-                if len(val) == 0 or val in nat_strings:
-                    iresult[i] = NPY_NAT
-                    continue
-                if isinstance(val, unicode) and PY2:
-                    val = val.encode('utf-8')
+                elif is_string_object(val):
+                    # string
+                    seen_string = 1
 
-                try:
-                    _string_to_dts(val, &dts, &out_local, &out_tzoffset)
-                except ValueError:
-                    # A ValueError at this point is a _parsing_ error
-                    # specifically _not_ OutOfBoundsDatetime
-                    if _parse_today_now(val, &iresult[i]):
+                    if len(val) == 0 or val in nat_strings:
+                        iresult[i] = NPY_NAT
                         continue
-                    elif require_iso8601:
-                        # if requiring iso8601 strings, skip trying
-                        # other formats
-                        if is_coerce:
-                            iresult[i] = NPY_NAT
-                            continue
-                        elif is_raise:
-                            raise ValueError("time data {val} doesn't match "
-                                             "format specified"
-                                             .format(val=val))
-                        return values, tz_out
+                    if isinstance(val, unicode) and PY2:
+                        val = val.encode('utf-8')
 
                     try:
-                        py_dt = parse_datetime_string(val, dayfirst=dayfirst,
-                                                      yearfirst=yearfirst)
-                    except Exception:
-                        if is_coerce:
-                            iresult[i] = NPY_NAT
+                        _string_to_dts(val, &dts, &out_local, &out_tzoffset)
+                    except ValueError:
+                        # A ValueError at this point is a _parsing_ error
+                        # specifically _not_ OutOfBoundsDatetime
+                        if _parse_today_now(val, &iresult[i]):
                             continue
-                        raise TypeError("invalid string coercion to datetime")
-
-                    # If the dateutil parser returned tzinfo, capture it
-                    # to check if all arguments have the same tzinfo
-                    tz = py_dt.utcoffset()
-                    if tz is not None:
-                        seen_datetime_offset = 1
-                        # dateutil timezone objects cannot be hashed, so store
-                        # the UTC offsets in seconds instead
-                        out_tzoffset_vals.add(tz.total_seconds())
-                    else:
-                        # Add a marker for naive string, to track if we are
-                        # parsing mixed naive and aware strings
-                        out_tzoffset_vals.add('naive')
-                    try:
+                        elif require_iso8601:
+                            # if requiring iso8601 strings, skip trying
+                            # other formats
+                            if is_coerce:
+                                iresult[i] = NPY_NAT
+                                continue
+                            elif is_raise:
+                                raise ValueError("time data {val} doesn't "
+                                                 "match format specified"
+                                                 .format(val=val))
+                            return values, tz_out
+
+                        try:
+                            py_dt = parse_datetime_string(val,
+                                                          dayfirst=dayfirst,
+                                                          yearfirst=yearfirst)
+                        except Exception:
+                            if is_coerce:
+                                iresult[i] = NPY_NAT
+                                continue
+                            raise TypeError("invalid string coercion to "
+                                            "datetime")
+
+                        # If the dateutil parser returned tzinfo, capture it
+                        # to check if all arguments have the same tzinfo
+                        tz = py_dt.utcoffset()
+                        if tz is not None:
+                            seen_datetime_offset = 1
+                            # dateutil timezone objects cannot be hashed, so
+                            # store the UTC offsets in seconds instead
+                            out_tzoffset_vals.add(tz.total_seconds())
+                        else:
+                            # Add a marker for naive string, to track if we are
+                            # parsing mixed naive and aware strings
+                            out_tzoffset_vals.add('naive')
+
                         _ts = convert_datetime_to_tsobject(py_dt, None)
                         iresult[i] = _ts.value
-                    except OutOfBoundsDatetime:
+                    except:
+                        # TODO: What exception are we concerned with here?
                         if is_coerce:
                             iresult[i] = NPY_NAT
                             continue
                         raise
-                except:
-                    # TODO: What exception are we concerned with here?
+                    else:
+                        # No error raised by string_to_dts, pick back up
+                        # where we left off
+                        value = dtstruct_to_dt64(&dts)
+                        if out_local == 1:
+                            seen_datetime_offset = 1
+                            # Store the out_tzoffset in seconds
+                            # since we store the total_seconds of
+                            # dateutil.tz.tzoffset objects
+                            out_tzoffset_vals.add(out_tzoffset * 60.)
+                            tz = pytz.FixedOffset(out_tzoffset)
+                            value = tz_convert_single(value, tz, UTC)
+                        else:
+                            # Add a marker for naive string, to track if we are
+                            # parsing mixed naive and aware strings
+                            out_tzoffset_vals.add('naive')
+                        iresult[i] = value
+                        check_dts_bounds(&dts)
+
+                else:
                     if is_coerce:
                         iresult[i] = NPY_NAT
-                        continue
-                    raise
-                else:
-                    # No error raised by string_to_dts, pick back up
-                    # where we left off
-                    value = dtstruct_to_dt64(&dts)
-                    if out_local == 1:
-                        seen_datetime_offset = 1
-                        # Store the out_tzoffset in seconds
-                        # since we store the total_seconds of
-                        # dateutil.tz.tzoffset objects
-                        out_tzoffset_vals.add(out_tzoffset * 60.)
-                        tz = pytz.FixedOffset(out_tzoffset)
-                        value = tz_convert_single(value, tz, UTC)
                     else:
-                        # Add a marker for naive string, to track if we are
-                        # parsing mixed naive and aware strings
-                        out_tzoffset_vals.add('naive')
-                    iresult[i] = value
-                    try:
-                        check_dts_bounds(&dts)
-                    except OutOfBoundsDatetime:
-                        # GH#19382 for just-barely-OutOfBounds falling back to
-                        # dateutil parser will return incorrect result because
-                        # it will ignore nanoseconds
-                        if is_coerce:
-                            iresult[i] = NPY_NAT
-                            continue
-                        elif require_iso8601:
-                            if is_raise:
-                                raise ValueError("time data {val} doesn't "
-                                                 "match format specified"
-                                                 .format(val=val))
-                            return values, tz_out
-                        raise
+                        raise TypeError("{typ} is not convertible to datetime"
+                                        .format(typ=type(val)))
 
-            else:
+            except OutOfBoundsDatetime:
                 if is_coerce:
                     iresult[i] = NPY_NAT
-                else:
-                    raise TypeError("{typ} is not convertible to datetime"
-                                    .format(typ=type(val)))
+                    continue
+                elif require_iso8601 and is_string_object(val):
+                    # GH#19382 for just-barely-OutOfBounds falling back to
+                    # dateutil parser will return incorrect result because
+                    # it will ignore nanoseconds
+                    if is_raise:
+                        raise ValueError("time data {val} doesn't "
+                                         "match format specified"
+                                         .format(val=val))
+                    assert is_ignore
+                    return values, tz_out
+                raise
 
     except OutOfBoundsDatetime:
         if is_raise:

From 45f880b63e7fbbed832c4f4d7eec0c617520d1c8 Mon Sep 17 00:00:00 2001
From: Fabian Haase <haase.fabian@gmail.com>
Date: Sun, 2 Dec 2018 23:00:36 +0100
Subject: [PATCH 65/78] Fix PEP-8 issues in contributing.rst (#24003)

---
 doc/source/contributing.rst | 9 ++++++++-
 setup.cfg                   | 1 -
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index 6fdb5bdbb6b1d7..c55452cf273093 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -670,6 +670,8 @@ Otherwise, you need to do it manually:
 
 .. code-block:: python
 
+    import warnings
+
     def old_func():
         """Summary of the function.
 
@@ -679,6 +681,9 @@ Otherwise, you need to do it manually:
         warnings.warn('Use new_func instead.', FutureWarning, stacklevel=2)
         new_func()
 
+    def new_func():
+        pass
+
 You'll also need to
 
 1. write a new test that asserts a warning is issued when calling with the deprecated argument
@@ -933,6 +938,8 @@ If your change involves checking that a warning is actually emitted, use
 
 .. code-block:: python
 
+   df = pd.DataFrame()
+
    with tm.assert_produces_warning(FutureWarning):
        df.some_operation()
 
@@ -963,7 +970,7 @@ a single test.
 
 .. code-block:: python
 
-   with warch.catch_warnings():
+   with warnings.catch_warnings():
        warnings.simplefilter("ignore", FutureWarning)
        # Or use warnings.filterwarnings(...)
 
diff --git a/setup.cfg b/setup.cfg
index cc7393e5a09b95..b9994e9ea0b2c2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -73,7 +73,6 @@ exclude =
     doc/source/comparison_with_sql.rst
     doc/source/comparison_with_stata.rst
     doc/source/computation.rst
-    doc/source/contributing.rst
     doc/source/contributing_docstring.rst
     doc/source/dsintro.rst
     doc/source/enhancingperf.rst

From 9d0f113f592b54c201a2e6b5edb1bbf6368fb8d3 Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Mon, 3 Dec 2018 00:06:08 +0100
Subject: [PATCH 66/78] API: fix str-accessor on CategoricalIndex (#24005)

---
 doc/source/whatsnew/v0.24.0.rst | 1 +
 pandas/core/strings.py          | 9 +++++----
 pandas/tests/test_strings.py    | 4 +---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 5dddae290fd37b..052fd619c77c33 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1256,6 +1256,7 @@ Categorical
 - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`).
 - In meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`)
 - Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`)
+- Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`)
 
 Datetimelike
 ^^^^^^^^^^^^
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 995700e79cb50c..e639e2e65e648d 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -15,7 +15,7 @@
 from pandas.core.dtypes.common import (
     ensure_object, is_bool_dtype, is_categorical_dtype, is_integer,
     is_list_like, is_object_dtype, is_re, is_scalar, is_string_like)
-from pandas.core.dtypes.generic import ABCIndex, ABCSeries
+from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.algorithms import take_1d
@@ -931,7 +931,7 @@ def str_extractall(arr, pat, flags=0):
     if regex.groups == 0:
         raise ValueError("pattern contains no capture groups")
 
-    if isinstance(arr, ABCIndex):
+    if isinstance(arr, ABCIndexClass):
         arr = arr.to_series().reset_index(drop=True)
 
     names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
@@ -1854,7 +1854,7 @@ def __iter__(self):
     def _wrap_result(self, result, use_codes=True,
                      name=None, expand=None, fill_value=np.nan):
 
-        from pandas.core.index import Index, MultiIndex
+        from pandas import Index, Series, MultiIndex
 
         # for category, we do the stuff on the categories, so blow it up
         # to the full series again
@@ -1862,7 +1862,8 @@ def _wrap_result(self, result, use_codes=True,
         # so make it possible to skip this step as the method already did this
         # before the transformation...
         if use_codes and self._is_categorical:
-            result = take_1d(result, self._orig.cat.codes,
+            # if self._orig is a CategoricalIndex, there is no .cat-accessor
+            result = take_1d(result, Series(self._orig, copy=False).cat.codes,
                              fill_value=fill_value)
 
         if not hasattr(result, 'ndim') or not hasattr(result, 'dtype'):
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index ced8d37678b8b0..c5a4e9511a6eff 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -245,9 +245,6 @@ def test_api_per_method(self, box, dtype,
                 and inferred_dtype in ['boolean', 'date', 'time']):
             pytest.xfail(reason='Inferring incorrectly because of NaNs; '
                          'solved by GH 23167')
-        if box == Index and dtype == 'category':
-            pytest.xfail(reason='Broken methods on CategoricalIndex; '
-                         'see GH 23556')
 
         t = box(values, dtype=dtype)  # explicit dtype to avoid casting
         method = getattr(t.str, method_name)
@@ -264,6 +261,7 @@ def test_api_per_method(self, box, dtype,
                          + ['mixed', 'mixed-integer'] * mixed_allowed)
 
         if inferred_dtype in allowed_types:
+            # xref GH 23555, GH 23556
             method(*args, **kwargs)  # works!
         else:
             # GH 23011, GH 23163

From a88ae2b3de47c4f7fc1d2ced11648ac5fa955ad7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 2 Dec 2018 15:06:55 -0800
Subject: [PATCH 67/78] PERF: optimize NaT lookups in cython modules (#24008)

---
 pandas/_libs/tslib.pyx                        |  4 +-
 pandas/_libs/tslibs/conversion.pyx            |  4 +-
 pandas/_libs/tslibs/nattype.pxd               | 11 +++
 pandas/_libs/tslibs/nattype.pyx               | 75 ++++++++++---------
 pandas/_libs/tslibs/period.pyx                |  5 +-
 pandas/_libs/tslibs/timedeltas.pyx            |  4 +-
 pandas/_libs/tslibs/timestamps.pyx            |  3 +-
 pandas/_libs/tslibs/timezones.pyx             |  2 +
 pandas/core/arrays/datetimes.py               |  5 +-
 pandas/core/arrays/period.py                  |  3 +-
 pandas/tests/scalar/test_nat.py               |  2 +-
 .../tests/scalar/timedelta/test_timedelta.py  |  2 +-
 pandas/tests/tseries/offsets/test_offsets.py  |  8 +-
 13 files changed, 69 insertions(+), 59 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 776ead1931e3e2..efabc5ad0b1ba5 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -40,8 +40,8 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject,
                                 tz_convert_utc_to_tzlocal)
 
 # many modules still look for NaT and iNaT here despite them not being needed
-from tslibs.nattype import nat_strings, NaT, iNaT  # noqa:F821
-from tslibs.nattype cimport checknull_with_nat, NPY_NAT
+from tslibs.nattype import nat_strings, iNaT  # noqa:F821
+from tslibs.nattype cimport checknull_with_nat, NPY_NAT, c_NaT as NaT
 
 from tslibs.offsets cimport to_offset
 
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 4a34065fe471f0..e6e7884f05b200 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -39,8 +39,8 @@ from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
 from timezones import UTC
 from parsing import parse_datetime_string
 
-from nattype import nat_strings, NaT
-from nattype cimport NPY_NAT, checknull_with_nat
+from nattype import nat_strings
+from nattype cimport NPY_NAT, checknull_with_nat, c_NaT as NaT
 
 # ----------------------------------------------------------------------
 # Constants
diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd
index 382ac9d323918a..f649518e969beb 100644
--- a/pandas/_libs/tslibs/nattype.pxd
+++ b/pandas/_libs/tslibs/nattype.pxd
@@ -1,9 +1,20 @@
 # -*- coding: utf-8 -*-
 
+from cpython.datetime cimport datetime
+
 from numpy cimport int64_t
 cdef int64_t NPY_NAT
 
 cdef bint _nat_scalar_rules[6]
 
+
+cdef class _NaT(datetime):
+    cdef readonly:
+        int64_t value
+        object freq
+
+cdef _NaT c_NaT
+
+
 cdef bint checknull_with_nat(object val)
 cdef bint is_null_datetimelike(object val)
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index 7b7f5f2e34c5f6..42ec235992089c 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -47,7 +47,7 @@ def _make_nan_func(func_name, doc):
 
 def _make_nat_func(func_name, doc):
     def f(*args, **kwargs):
-        return NaT
+        return c_NaT
     f.__name__ = func_name
     f.__doc__ = doc
     return f
@@ -67,10 +67,10 @@ def _make_error_func(func_name, cls):
 
 
 cdef _nat_divide_op(self, other):
-    if PyDelta_Check(other) or is_timedelta64_object(other) or other is NaT:
+    if PyDelta_Check(other) or is_timedelta64_object(other) or other is c_NaT:
         return np.nan
     if is_integer_object(other) or is_float_object(other):
-        return NaT
+        return c_NaT
     return NotImplemented
 
 
@@ -82,15 +82,15 @@ cdef _nat_rdivide_op(self, other):
 
 def __nat_unpickle(*args):
     # return constant defined in the module
-    return NaT
+    return c_NaT
 
 # ----------------------------------------------------------------------
 
 
 cdef class _NaT(datetime):
-    cdef readonly:
-        int64_t value
-        object freq
+    # cdef readonly:
+    #    int64_t value
+    #    object freq
 
     def __hash__(_NaT self):
         # py3k needs this defined here
@@ -116,18 +116,18 @@ cdef class _NaT(datetime):
 
     def __add__(self, other):
         if PyDateTime_Check(other):
-            return NaT
+            return c_NaT
 
         elif hasattr(other, 'delta'):
             # Timedelta, offsets.Tick, offsets.Week
-            return NaT
+            return c_NaT
         elif getattr(other, '_typ', None) in ['dateoffset', 'series',
                                               'period', 'datetimeindex',
                                               'timedeltaindex']:
             # Duplicate logic in _Timestamp.__add__ to avoid needing
             # to subclass; allows us to @final(_Timestamp.__add__)
             return NotImplemented
-        return NaT
+        return c_NaT
 
     def __sub__(self, other):
         # Duplicate some logic from _Timestamp.__sub__ to avoid needing
@@ -184,19 +184,6 @@ cdef class _NaT(datetime):
         """ Returns a numpy.datetime64 object with 'ns' precision """
         return np.datetime64('NaT', 'ns')
 
-
-class NaTType(_NaT):
-    """(N)ot-(A)-(T)ime, the time equivalent of NaN"""
-
-    def __new__(cls):
-        cdef _NaT base
-
-        base = _NaT.__new__(cls, 1, 1, 1)
-        base.value = NPY_NAT
-        base.freq = None
-
-        return base
-
     def __repr__(self):
         return 'NaT'
 
@@ -216,20 +203,11 @@ class NaTType(_NaT):
     def __long__(self):
         return NPY_NAT
 
-    def __reduce_ex__(self, protocol):
-        # python 3.6 compat
-        # http://bugs.python.org/issue28730
-        # now __reduce_ex__ is defined and higher priority than __reduce__
-        return self.__reduce__()
-
-    def __reduce__(self):
-        return (__nat_unpickle, (None, ))
-
     def total_seconds(self):
         """
         Total duration of timedelta in seconds (to ns precision)
         """
-        # GH 10939
+        # GH#10939
         return np.nan
 
     @property
@@ -260,6 +238,28 @@ class NaTType(_NaT):
     def is_year_end(self):
         return False
 
+
+class NaTType(_NaT):
+    """(N)ot-(A)-(T)ime, the time equivalent of NaN"""
+
+    def __new__(cls):
+        cdef _NaT base
+
+        base = _NaT.__new__(cls, 1, 1, 1)
+        base.value = NPY_NAT
+        base.freq = None
+
+        return base
+
+    def __reduce_ex__(self, protocol):
+        # python 3.6 compat
+        # http://bugs.python.org/issue28730
+        # now __reduce_ex__ is defined and higher priority than __reduce__
+        return self.__reduce__()
+
+    def __reduce__(self):
+        return (__nat_unpickle, (None, ))
+
     def __rdiv__(self, other):
         return _nat_rdivide_op(self, other)
 
@@ -271,7 +271,7 @@ class NaTType(_NaT):
 
     def __rmul__(self, other):
         if is_integer_object(other) or is_float_object(other):
-            return NaT
+            return c_NaT
         return NotImplemented
 
     # ----------------------------------------------------------------------
@@ -659,14 +659,15 @@ class NaTType(_NaT):
         """)
 
 
-NaT = NaTType()
+c_NaT = NaTType()  # C-visible
+NaT = c_NaT        # Python-visible
 
 
 # ----------------------------------------------------------------------
 
 cdef inline bint checknull_with_nat(object val):
     """ utility to check if a value is a nat or not """
-    return val is None or util.is_nan(val) or val is NaT
+    return val is None or util.is_nan(val) or val is c_NaT
 
 
 cdef inline bint is_null_datetimelike(object val):
@@ -683,7 +684,7 @@ cdef inline bint is_null_datetimelike(object val):
     """
     if val is None or util.is_nan(val):
         return True
-    elif val is NaT:
+    elif val is c_NaT:
         return True
     elif util.is_timedelta64_object(val):
         return val.view('int64') == NPY_NAT
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
index e02e493c32a008..dfbf24cf177f60 100644
--- a/pandas/_libs/tslibs/period.pyx
+++ b/pandas/_libs/tslibs/period.pyx
@@ -46,8 +46,9 @@ from frequencies cimport (get_freq_code, get_base_alias,
                           get_rule_month)
 from parsing import parse_time_string
 from resolution import Resolution
-from nattype import nat_strings, NaT
-from nattype cimport _nat_scalar_rules, NPY_NAT, is_null_datetimelike
+from nattype import nat_strings
+from nattype cimport (
+    _nat_scalar_rules, NPY_NAT, is_null_datetimelike, c_NaT as NaT)
 from offsets cimport to_offset
 from offsets import _Tick
 
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 4d612a6f431076..b0bead2f66ce4e 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -33,8 +33,8 @@ from ccalendar import DAY_SECONDS
 from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct,
                           pandas_timedeltastruct)
 
-from nattype import nat_strings, NaT
-from nattype cimport checknull_with_nat, NPY_NAT
+from nattype import nat_strings
+from nattype cimport checknull_with_nat, NPY_NAT, c_NaT as NaT
 from offsets cimport to_offset
 
 # ----------------------------------------------------------------------
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index f414cd161e562f..b4862a5f3b02f1 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -26,8 +26,7 @@ from conversion import tz_localize_to_utc, normalize_i8_timestamps
 from conversion cimport (tz_convert_single, _TSObject,
                          convert_to_tsobject, convert_datetime_to_tsobject)
 from fields import get_start_end_field, get_date_name_field
-from nattype import NaT
-from nattype cimport NPY_NAT
+from nattype cimport NPY_NAT, c_NaT as NaT
 from np_datetime import OutOfBoundsDatetime
 from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds,
                           npy_datetimestruct, dt64_to_dtstruct)
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
index 5fa8a45af30830..9f8922b274abdd 100644
--- a/pandas/_libs/tslibs/timezones.pyx
+++ b/pandas/_libs/tslibs/timezones.pyx
@@ -2,6 +2,8 @@
 
 from cython import Py_ssize_t
 
+from cpython.datetime cimport tzinfo
+
 # dateutil compat
 from dateutil.tz import (
     tzutc as _dateutil_tzutc,
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 050442c530314a..2341187c40a9ed 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -6,10 +6,9 @@
 from pytz import utc
 
 from pandas._libs import lib, tslib
-from pandas._libs.tslib import NaT, Timestamp, iNaT
 from pandas._libs.tslibs import (
-    ccalendar, conversion, fields, normalize_date, resolution as libresolution,
-    timezones)
+    NaT, Timestamp, ccalendar, conversion, fields, iNaT, normalize_date,
+    resolution as libresolution, timezones)
 import pandas.compat as compat
 from pandas.errors import PerformanceWarning
 from pandas.util._decorators import Appender, cache_readonly
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index 53629dca4d3912..bb009d9370dec0 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -4,8 +4,7 @@
 
 import numpy as np
 
-from pandas._libs.tslib import NaT, iNaT
-from pandas._libs.tslibs import period as libperiod
+from pandas._libs.tslibs import NaT, iNaT, period as libperiod
 from pandas._libs.tslibs.fields import isleapyear_arr
 from pandas._libs.tslibs.period import (
     DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period, get_period_field_arr,
diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py
index ddf39847441146..d2a31de5c09388 100644
--- a/pandas/tests/scalar/test_nat.py
+++ b/pandas/tests/scalar/test_nat.py
@@ -4,7 +4,7 @@
 import pytest
 import pytz
 
-from pandas._libs.tslib import iNaT
+from pandas._libs.tslibs import iNaT
 
 from pandas import (
     DatetimeIndex, Index, NaT, Period, Series, Timedelta, TimedeltaIndex,
diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py
index 82cfdb9e0751e4..477c8aa4c3b0df 100644
--- a/pandas/tests/scalar/timedelta/test_timedelta.py
+++ b/pandas/tests/scalar/timedelta/test_timedelta.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pytest
 
-from pandas._libs.tslib import NaT, iNaT
+from pandas._libs.tslibs import NaT, iNaT
 import pandas.compat as compat
 
 import pandas as pd
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 269b017fa2141b..030887ac731f32 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -5,13 +5,11 @@
 import pytest
 import pytz
 
-import pandas._libs.tslib as tslib
-from pandas._libs.tslib import NaT, Timestamp
-from pandas._libs.tslibs import conversion, timezones
+from pandas._libs.tslibs import (
+    NaT, OutOfBoundsDatetime, Timedelta, Timestamp, conversion, timezones)
 from pandas._libs.tslibs.frequencies import (
     INVALID_FREQ_ERR_MSG, get_freq_code, get_freq_str)
 import pandas._libs.tslibs.offsets as liboffsets
-from pandas._libs.tslibs.timedeltas import Timedelta
 import pandas.compat as compat
 from pandas.compat import range
 from pandas.compat.numpy import np_datetime64_compat
@@ -124,7 +122,7 @@ def test_apply_out_of_range(self, tz_naive_fixture):
             assert isinstance(result, datetime)
             assert t.tzinfo == result.tzinfo
 
-        except tslib.OutOfBoundsDatetime:
+        except OutOfBoundsDatetime:
             raise
         except (ValueError, KeyError):
             # we are creating an invalid offset

From 5259e33d8c265904f91b373e489010f2db59ceab Mon Sep 17 00:00:00 2001
From: Christopher Whelan <topherwhelan@gmail.com>
Date: Sun, 2 Dec 2018 15:10:43 -0800
Subject: [PATCH 68/78] BUG: CategoricalIndex allows reindexing with non-unique
 CategoricalIndex (#23963)

---
 doc/source/whatsnew/v0.24.0.rst       |  2 ++
 pandas/core/indexes/category.py       | 12 ++++++++----
 pandas/tests/frame/test_analytics.py  | 15 +++++++++++++++
 pandas/tests/indexes/test_category.py | 11 +++++++++++
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 052fd619c77c33..b4181474c392a7 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1091,6 +1091,7 @@ Other API Changes
 - :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
 - :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).
 - The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`)
+- :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`).
 
 .. _whatsnew_0240.deprecations:
 
@@ -1359,6 +1360,7 @@ Numeric
 - Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`).
 - :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`)
 - Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2:sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`)
+- Calls such as :meth:`DataFrame.round` with a non-unique :meth:`CategoricalIndex` now return expected data. Previously, data would be improperly duplicated (:issue:`21809`).
 
 Strings
 ^^^^^^^
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 91c7648d5cf2ea..6d26894514a9c1 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -534,12 +534,16 @@ def reindex(self, target, method=None, level=None, limit=None,
 
         target = ibase.ensure_index(target)
 
-        if not is_categorical_dtype(target) and not target.is_unique:
-            raise ValueError("cannot reindex with a non-unique indexer")
+        if self.equals(target):
+            indexer = None
+            missing = []
+        else:
+            if not target.is_unique:
+                raise ValueError("cannot reindex with a non-unique indexer")
 
-        indexer, missing = self.get_indexer_non_unique(np.array(target))
+            indexer, missing = self.get_indexer_non_unique(np.array(target))
 
-        if len(self.codes):
+        if len(self.codes) and indexer is not None:
             new_target = self.take(indexer)
         else:
             new_target = target
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index c9481fef4aa360..2bf2dd593184ff 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -1805,6 +1805,21 @@ def test_built_in_round(self):
             {'col1': [1., 2., 3.], 'col2': [1., 2., 3.]})
         tm.assert_frame_equal(round(df), expected_rounded)
 
+    def test_round_nonunique_categorical(self):
+        # See GH21809
+        idx = pd.CategoricalIndex(['low'] * 3 + ['hi'] * 3)
+        df = pd.DataFrame(np.random.rand(6, 3), columns=list('abc'))
+
+        expected = df.round(3)
+        expected.index = idx
+
+        df_categorical = df.copy().set_index(idx)
+        assert df_categorical.shape == (6, 3)
+        result = df_categorical.round(3)
+        assert result.shape == (6, 3)
+
+        tm.assert_frame_equal(result, expected)
+
     def test_pct_change(self):
         # GH 11150
         pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange(
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 6c5a70d76e3b58..937e5e5a6af518 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -540,6 +540,17 @@ def test_reindex_dtype(self):
         tm.assert_numpy_array_equal(indexer,
                                     np.array([0, 3, 2], dtype=np.intp))
 
+    def test_reindex_duplicate_target(self):
+        # See GH23963
+        c = CategoricalIndex(['a', 'b', 'c', 'a'],
+                             categories=['a', 'b', 'c', 'd'])
+        with pytest.raises(ValueError, match='non-unique indexer'):
+            c.reindex(['a', 'a', 'c'])
+
+        with pytest.raises(ValueError, match='non-unique indexer'):
+            c.reindex(CategoricalIndex(['a', 'a', 'c'],
+                                       categories=['a', 'b', 'c', 'd']))
+
     def test_reindex_empty_index(self):
         # See GH16770
         c = CategoricalIndex([])

From d967aef3aac03d1c0c383048d53cff0ccdb0b5c1 Mon Sep 17 00:00:00 2001
From: Kaiqi Dong <kaiqidong1991@gmail.com>
Date: Mon, 3 Dec 2018 07:19:00 +0800
Subject: [PATCH 69/78] BUG/TST: PeriodArray.__setitem__ with slice and
 list-like value (#23991)

---
 doc/source/whatsnew/v0.24.0.rst        |  1 +
 pandas/core/arrays/period.py           |  9 ++++++---
 pandas/tests/extension/base/setitem.py | 10 ++++++++++
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index b4181474c392a7..3913e101d4667f 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1401,6 +1401,7 @@ Indexing
 - Bug where setting a timedelta column by ``Index`` causes it to be casted to double, and therefore lose precision (:issue:`23511`)
 - Bug in :func:`Index.union` and :func:`Index.intersection` where name of the ``Index`` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`)
 - Bug in :class:`Index` slicing with boolean :class:`Index` may raise ``TypeError`` (:issue:`22533`)
+- Bug in ``PeriodArray.__setitem__`` when accepting slice and list-like value (:issue:`23978`)
 
 Missing
 ^^^^^^^
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index bb009d9370dec0..6ed68ee890e403 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -346,7 +346,7 @@ def __repr__(self):
 
     def __setitem__(
             self,
-            key,   # type: Union[int, Sequence[int], Sequence[bool]]
+            key,   # type: Union[int, Sequence[int], Sequence[bool], slice]
             value  # type: Union[NaTType, Period, Sequence[Period]]
     ):
         # type: (...) -> None
@@ -356,11 +356,14 @@ def __setitem__(
         # ndarray[datetime64ns]. I think ndarray[int] / ndarray[str] won't
         # work, since the freq can't be inferred.
         if is_list_like(value):
-            if len(key) != len(value) and not com.is_bool_indexer(key):
+            is_slice = isinstance(key, slice)
+            if (not is_slice
+                    and len(key) != len(value)
+                    and not com.is_bool_indexer(key)):
                 msg = ("shape mismatch: value array of length '{}' does not "
                        "match indexing result of length '{}'.")
                 raise ValueError(msg.format(len(key), len(value)))
-            if len(key) == 0:
+            if not is_slice and len(key) == 0:
                 return
 
             value = period_array(value)
diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
index 3d798b2af5c434..479ab716763157 100644
--- a/pandas/tests/extension/base/setitem.py
+++ b/pandas/tests/extension/base/setitem.py
@@ -173,3 +173,13 @@ def test_setitem_tuple_index(self, data):
         expected = pd.Series(data.take([1, 1]), index=s.index)
         s[(0, 1)] = data[1]
         self.assert_series_equal(s, expected)
+
+    def test_setitem_slice_mismatch_length_raises(self, data):
+        arr = data[:5]
+        with pytest.raises(ValueError):
+            arr[:1] = arr[:2]
+
+    def test_setitem_slice_array(self, data):
+        arr = data[:5].copy()
+        arr[:5] = data[-5:]
+        self.assert_extension_array_equal(arr, data[-5:])

From 9b58f4d639767f4a6a490234c4b4602e7434d267 Mon Sep 17 00:00:00 2001
From: Parfait G <parfait.gasana@gmail.com>
Date: Sun, 2 Dec 2018 17:20:06 -0600
Subject: [PATCH 70/78] DOC: Updating operators docstrings (#20415)

---
 pandas/core/ops.py | 422 ++++++++++++++++++++++++++++++---------------
 1 file changed, 286 insertions(+), 136 deletions(-)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 2a21593fab8f54..850c4dd7c45e7f 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -235,7 +235,7 @@ def _gen_eval_kwargs(name):
     {}
 
     >>> _gen_eval_kwargs("rtruediv")
-    {"reversed": True, "truediv": True}
+    {'reversed': True, 'truediv': True}
     """
     kwargs = {}
 
@@ -384,124 +384,21 @@ def _get_op_name(op, special):
 # -----------------------------------------------------------------------------
 # Docstring Generation and Templates
 
-_add_example_FRAME = """
->>> a = pd.DataFrame([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd'],
-...                  columns=['one'])
->>> a
-   one
-a  1.0
-b  1.0
-c  1.0
-d  NaN
->>> b = pd.DataFrame(dict(one=[1, np.nan, 1, np.nan],
-...                       two=[np.nan, 2, np.nan, 2]),
-...                  index=['a', 'b', 'd', 'e'])
->>> b
-   one  two
-a  1.0  NaN
-b  NaN  2.0
-d  1.0  NaN
-e  NaN  2.0
->>> a.add(b, fill_value=0)
-   one  two
-a  2.0  NaN
-b  1.0  2.0
-c  1.0  NaN
-d  1.0  NaN
-e  NaN  2.0
-"""
-
-_sub_example_FRAME = """
->>> a = pd.DataFrame([2, 1, 1, np.nan], index=['a', 'b', 'c', 'd'],
-...                  columns=['one'])
->>> a
-   one
-a  2.0
-b  1.0
-c  1.0
-d  NaN
->>> b = pd.DataFrame(dict(one=[1, np.nan, 1, np.nan],
-...                       two=[3, 2, np.nan, 2]),
-...                  index=['a', 'b', 'd', 'e'])
->>> b
-   one  two
-a  1.0  3.0
-b  NaN  2.0
-d  1.0  NaN
-e  NaN  2.0
->>> a.sub(b, fill_value=0)
-   one  two
-a  1.0  -3.0
-b  1.0  -2.0
-c  1.0  NaN
-d  -1.0  NaN
-e  NaN  -2.0
-"""
-
-_mod_example_FRAME = """
-**Using a scalar argument**
-
->>> df = pd.DataFrame([2, 4, np.nan, 6.2], index=["a", "b", "c", "d"],
-...                   columns=['one'])
->>> df
-    one
-a   2.0
-b   4.0
-c   NaN
-d   6.2
->>> df.mod(3, fill_value=-1)
-    one
-a   2.0
-b   1.0
-c   2.0
-d   0.2
-
-**Using a DataFrame argument**
-
->>> df = pd.DataFrame(dict(one=[np.nan, 2, 3, 14], two=[np.nan, 1, 1, 3]),
-...                   index=['a', 'b', 'c', 'd'])
->>> df
-    one   two
-a   NaN   NaN
-b   2.0   1.0
-c   3.0   1.0
-d   14.0  3.0
->>> other = pd.DataFrame(dict(one=[np.nan, np.nan, 6, np.nan],
-...                           three=[np.nan, 10, np.nan, -7]),
-...                      index=['a', 'b', 'd', 'e'])
->>> other
-    one three
-a   NaN NaN
-b   NaN 10.0
-d   6.0 NaN
-e   NaN -7.0
->>> df.mod(other, fill_value=3)
-    one   three two
-a   NaN   NaN   NaN
-b   2.0   3.0   1.0
-c   0.0   NaN   1.0
-d   2.0   NaN   0.0
-e   NaN  -4.0   NaN
-"""
-
 _op_descriptions = {
     # Arithmetic Operators
     'add': {'op': '+',
             'desc': 'Addition',
-            'reverse': 'radd',
-            'df_examples': _add_example_FRAME},
+            'reverse': 'radd'},
     'sub': {'op': '-',
             'desc': 'Subtraction',
-            'reverse': 'rsub',
-            'df_examples': _sub_example_FRAME},
+            'reverse': 'rsub'},
     'mul': {'op': '*',
             'desc': 'Multiplication',
             'reverse': 'rmul',
             'df_examples': None},
     'mod': {'op': '%',
             'desc': 'Modulo',
-            'reverse': 'rmod',
-            'df_examples': _mod_example_FRAME},
+            'reverse': 'rmod'},
     'pow': {'op': '**',
             'desc': 'Exponential power',
             'reverse': 'rpow',
@@ -522,28 +419,23 @@ def _get_op_name(op, special):
     # Comparison Operators
     'eq': {'op': '==',
            'desc': 'Equal to',
-           'reverse': None,
-           'df_examples': None},
+           'reverse': None},
     'ne': {'op': '!=',
            'desc': 'Not equal to',
-           'reverse': None,
-           'df_examples': None},
+           'reverse': None},
     'lt': {'op': '<',
            'desc': 'Less than',
-           'reverse': None,
-           'df_examples': None},
+           'reverse': None},
     'le': {'op': '<=',
            'desc': 'Less than or equal to',
-           'reverse': None,
-           'df_examples': None},
+           'reverse': None},
     'gt': {'op': '>',
            'desc': 'Greater than',
-           'reverse': None,
-           'df_examples': None},
+           'reverse': None},
     'ge': {'op': '>=',
            'desc': 'Greater than or equal to',
-           'reverse': None,
-           'df_examples': None}}
+           'reverse': None}
+}
 
 _op_names = list(_op_descriptions.keys())
 for key in _op_names:
@@ -635,38 +527,295 @@ def _get_op_name(op, special):
 _flex_doc_FRAME = """
 {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
 
-Equivalent to ``{equiv}``, but with support to substitute a fill_value for
-missing data in one of the inputs.
+Equivalent to ``{equiv}``, but with support to substitute a fill_value
+for missing data in one of the inputs. With reverse version, `{reverse}`.
+
+Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to
+arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**.
 
 Parameters
 ----------
-other : Series, DataFrame, or constant
-axis : {{0, 1, 'index', 'columns'}}
-    For Series input, axis to match Series index on
-level : int or name
+other : scalar, sequence, Series, or DataFrame
+    Any single or multiple element data structure, or list-like object.
+axis :  {{0 or 'index', 1 or 'columns'}}
+    Whether to compare by the index (0 or 'index') or columns
+    (1 or 'columns'). For Series input, axis to match Series index on.
+level : int or label
     Broadcast across a level, matching Index values on the
-    passed MultiIndex level
-fill_value : None or float value, default None
+    passed MultiIndex level.
+fill_value : float or None, default None
     Fill existing missing (NaN) values, and any new element needed for
     successful DataFrame alignment, with this value before computation.
     If data in both corresponding DataFrame locations is missing
-    the result will be missing
+    the result will be missing.
 
 Notes
 -----
-Mismatched indices will be unioned together
+Mismatched indices will be unioned together.
 
 Returns
 -------
-result : DataFrame
+DataFrame
+    Result of the arithmetic operation.
+
+See Also
+--------
+DataFrame.add : Add DataFrames.
+DataFrame.sub : Subtract DataFrames.
+DataFrame.mul : Multiply DataFrames.
+DataFrame.div : Divide DataFrames (float division).
+DataFrame.truediv : Divide DataFrames (float division).
+DataFrame.floordiv : Divide DataFrames (integer division).
+DataFrame.mod : Calculate modulo (remainder after division).
+DataFrame.pow : Calculate exponential power.
 
 Examples
 --------
-{df_examples}
+>>> df = pd.DataFrame({{'angles': [0, 3, 4],
+...                    'degrees': [360, 180, 360]}},
+...                   index=['circle', 'triangle', 'rectangle'])
+>>> df
+           angles  degrees
+circle          0      360
+triangle        3      180
+rectangle       4      360
+
+Add a scalar with operator version which return the same
+results.
+
+>>> df + 1
+           angles  degrees
+circle          1      361
+triangle        4      181
+rectangle       5      361
+
+>>> df.add(1)
+           angles  degrees
+circle          1      361
+triangle        4      181
+rectangle       5      361
+
+Divide by constant with reverse version.
+
+>>> df.div(10)
+           angles  degrees
+circle        0.0     36.0
+triangle      0.3     18.0
+rectangle     0.4     36.0
+
+>>> df.rdiv(10)
+             angles   degrees
+circle          inf  0.027778
+triangle   3.333333  0.055556
+rectangle  2.500000  0.027778
+
+Subtract a list and Series by axis with operator version.
+
+>>> df - [1, 2]
+           angles  degrees
+circle         -1      358
+triangle        2      178
+rectangle       3      358
+
+>>> df.sub([1, 2], axis='columns')
+           angles  degrees
+circle         -1      358
+triangle        2      178
+rectangle       3      358
+
+>>> df.sub(pd.Series([1, 1, 1], index=['circle', 'triangle', 'rectangle']),
+...        axis='index')
+           angles  degrees
+circle         -1      359
+triangle        2      179
+rectangle       3      359
+
+Multiply a DataFrame of different shape with operator version.
+
+>>> other = pd.DataFrame({{'angles': [0, 3, 4]}},
+...                      index=['circle', 'triangle', 'rectangle'])
+>>> other
+           angles
+circle          0
+triangle        3
+rectangle       4
+
+>>> df * other
+           angles  degrees
+circle          0      NaN
+triangle        9      NaN
+rectangle      16      NaN
+
+>>> df.mul(other, fill_value=0)
+           angles  degrees
+circle          0      0.0
+triangle        9      0.0
+rectangle      16      0.0
+
+Divide by a MultiIndex by level.
+
+>>> df_multindex = pd.DataFrame({{'angles': [0, 3, 4, 4, 5, 6],
+...                              'degrees': [360, 180, 360, 360, 540, 720]}},
+...                             index=[['A', 'A', 'A', 'B', 'B', 'B'],
+...                                    ['circle', 'triangle', 'rectangle',
+...                                     'square', 'pentagon', 'hexagon']])
+>>> df_multindex
+             angles  degrees
+A circle          0      360
+  triangle        3      180
+  rectangle       4      360
+B square          4      360
+  pentagon        5      540
+  hexagon         6      720
+
+>>> df.div(df_multindex, level=1, fill_value=0)
+             angles  degrees
+A circle        NaN      1.0
+  triangle      1.0      1.0
+  rectangle     1.0      1.0
+B square        0.0      0.0
+  pentagon      0.0      0.0
+  hexagon       0.0      0.0
+"""
+
+_flex_comp_doc_FRAME = """
+{desc} of dataframe and other, element-wise (binary operator `{op_name}`).
+
+Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison
+operators.
+
+Equivalent to `==`, `=!`, `<=`, `<`, `>=`, `>` with support to choose axis
+(rows or columns) and level for comparison.
+
+Parameters
+----------
+other : scalar, sequence, Series, or DataFrame
+    Any single or multiple element data structure, or list-like object.
+axis :  {{0 or 'index', 1 or 'columns'}}, default 'columns'
+    Whether to compare by the index (0 or 'index') or columns
+    (1 or 'columns').
+level : int or label
+    Broadcast across a level, matching Index values on the passed
+    MultiIndex level.
+
+Returns
+-------
+DataFrame of bool
+    Result of the comparison.
 
 See Also
 --------
-DataFrame.{reverse}
+DataFrame.eq : Compare DataFrames for equality elementwise.
+DataFrame.ne : Compare DataFrames for inequality elementwise.
+DataFrame.le : Compare DataFrames for less than inequality
+    or equality elementwise.
+DataFrame.lt : Compare DataFrames for strictly less than
+    inequality elementwise.
+DataFrame.ge : Compare DataFrames for greater than inequality
+    or equality elementwise.
+DataFrame.gt : Compare DataFrames for strictly greater than
+    inequality elementwise.
+
+Notes
+--------
+Mismatched indices will be unioned together.
+`NaN` values are considered different (i.e. `NaN` != `NaN`).
+
+Examples
+--------
+>>> df = pd.DataFrame({{'cost': [250, 150, 100],
+...                    'revenue': [100, 250, 300]}},
+...                   index=['A', 'B', 'C'])
+>>> df
+   cost  revenue
+A   250      100
+B   150      250
+C   100      300
+
+Compare to a scalar and operator version which return the same
+results.
+
+>>> df == 100
+    cost  revenue
+A  False     True
+B  False    False
+C   True    False
+
+>>> df.eq(100)
+    cost  revenue
+A  False     True
+B  False    False
+C   True    False
+
+Compare to a list and Series by axis and operator version. As shown,
+for list axis is by default 'index', but for Series axis is by
+default 'columns'.
+
+>>> df != [100, 250, 300]
+   cost  revenue
+A  True    False
+B  True    False
+C  True    False
+
+>>> df.ne([100, 250, 300], axis='index')
+   cost  revenue
+A  True    False
+B  True    False
+C  True    False
+
+>>> df != pd.Series([100, 250, 300])
+   cost  revenue     0     1     2
+A  True     True  True  True  True
+B  True     True  True  True  True
+C  True     True  True  True  True
+
+>>> df.ne(pd.Series([100, 250, 300]), axis='columns')
+   cost  revenue     0     1     2
+A  True     True  True  True  True
+B  True     True  True  True  True
+C  True     True  True  True  True
+
+Compare to a DataFrame of different shape.
+
+>>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}},
+...                      index=['A', 'B', 'C', 'D'])
+>>> other
+   revenue
+A      300
+B      250
+C      100
+D      150
+
+>>> df.gt(other)
+    cost  revenue
+A  False    False
+B  False    False
+C  False     True
+D  False    False
+
+Compare to a MultiIndex by level.
+
+>>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
+...                              'revenue': [100, 250, 300, 200, 175, 225]}},
+...                             index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
+...                                    ['A', 'B', 'C', 'A', 'B' ,'C']])
+>>> df_multindex
+      cost  revenue
+Q1 A   250      100
+   B   150      250
+   C   100      300
+Q2 A   150      200
+   B   300      175
+   C   220      225
+
+>>> df.le(df_multindex, level=1)
+       cost  revenue
+Q1 A   True     True
+   B   True     True
+   C   True     True
+Q2 A  False     True
+   B   True    False
+   C   True    False
 """
 
 _flex_doc_PANEL = """
@@ -734,8 +883,7 @@ def _make_flex_doc(op_name, typ):
     elif typ == 'dataframe':
         base_doc = _flex_doc_FRAME
         doc = base_doc.format(desc=op_desc['desc'], op_name=op_name,
-                              equiv=equiv, reverse=op_desc['reverse'],
-                              df_examples=op_desc['df_examples'])
+                              equiv=equiv, reverse=op_desc['reverse'])
     elif typ == 'panel':
         base_doc = _flex_doc_PANEL
         doc = base_doc.format(desc=op_desc['desc'], op_name=op_name,
@@ -1894,8 +2042,10 @@ def na_op(x, y):
             result = mask_cmp_op(x, y, op, (np.ndarray, ABCSeries))
         return result
 
-    @Appender('Wrapper for flexible comparison methods {name}'
-              .format(name=op_name))
+    doc = _flex_comp_doc_FRAME.format(op_name=op_name,
+                                      desc=_op_descriptions[op_name]['desc'])
+
+    @Appender(doc)
     def f(self, other, axis=default_axis, level=None):
 
         other = _align_method_FRAME(self, other, axis)

From 022f458ee858721d366c27dd88940d69383ab960 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Mon, 3 Dec 2018 00:05:46 +0000
Subject: [PATCH 71/78] CI: Linting with azure instead of travis (#22854)

---
 .travis.yml                             |   3 +-
 azure-pipelines.yml                     | 101 ++++++++++++++++++++++++
 ci/code_checks.sh                       |  85 ++++++++++++++------
 ci/deps/travis-36.yaml                  |   9 ---
 environment.yml                         |   3 +
 pandas/core/frame.py                    |   4 -
 pandas/core/panel.py                    |   4 +-
 pandas/core/tools/timedeltas.py         |   2 +-
 pandas/core/window.py                   |  86 ++++++++++----------
 pandas/io/gbq.py                        |  12 +--
 pandas/io/json/json.py                  |   8 +-
 requirements-dev.txt                    |   4 +-
 scripts/generate_pip_deps_from_conda.py |  25 +++++-
 13 files changed, 245 insertions(+), 101 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 6bbc44fba864af..03026647d6bb8c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -52,7 +52,7 @@ matrix:
           - python-gtk2
     - dist: trusty
       env:
-        - JOB="3.6, lint, coverage" ENV_FILE="ci/deps/travis-36.yaml" PATTERN="not slow and not network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true LINT=true
+        - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36.yaml" PATTERN="not slow and not network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
     - dist: trusty
       env:
         - JOB="3.7, NumPy dev" ENV_FILE="ci/deps/travis-37-numpydev.yaml" PATTERN="not slow and not network" TEST_ARGS="-W error" PANDAS_TESTING_MODE="deprecate"
@@ -108,7 +108,6 @@ script:
   - source activate pandas-dev
   - ci/run_build_docs.sh
   - ci/run_tests.sh
-  - ci/code_checks.sh
 
 after_script:
   - echo "after_script start"
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 373c22fdf8e629..a58f82ec6de49f 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -23,3 +23,104 @@ jobs:
   parameters:
     name: WindowsPy27
     vmImage: vs2017-win2016
+
+- job: 'Checks_and_doc'
+  pool:
+    vmImage: ubuntu-16.04
+  timeoutInMinutes: 90
+  steps:
+  - script: |
+      # XXX next command should avoid redefining the path in every step, but
+      # made the process crash as it couldn't find deactivate
+      #echo '##vso[task.prependpath]$HOME/miniconda3/bin'
+      echo '##vso[task.setvariable variable=CONDA_ENV]pandas-dev'
+      echo '##vso[task.setvariable variable=ENV_FILE]environment.yml'
+      echo '##vso[task.setvariable variable=AZURE]true'
+    displayName: 'Setting environment variables'
+
+  # Do not require a conda environment
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      ci/code_checks.sh patterns
+    displayName: 'Looking for unwanted patterns'
+    condition: true
+
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      sudo apt-get install -y libc6-dev-i386
+      ci/incremental/install_miniconda.sh
+      ci/incremental/setup_conda_environment.sh
+    displayName: 'Set up environment'
+
+  # Do not require pandas
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      source activate pandas-dev
+      ci/code_checks.sh lint
+    displayName: 'Linting'
+    condition: true
+
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      source activate pandas-dev
+      ci/code_checks.sh dependencies
+    displayName: 'Dependencies consistency'
+    condition: true
+
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      source activate pandas-dev
+      ci/incremental/build.sh
+    displayName: 'Build'
+    condition: true
+
+  # Require pandas
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      source activate pandas-dev
+      ci/code_checks.sh code
+    displayName: 'Checks on imported code'
+    condition: true
+
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      source activate pandas-dev
+      ci/code_checks.sh doctests
+    displayName: 'Running doctests'
+    condition: true
+
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      source activate pandas-dev
+      ci/code_checks.sh docstrings
+    displayName: 'Docstring validation'
+    condition: true
+
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      source activate pandas-dev
+      pytest --capture=no --strict scripts
+    displayName: 'Testing docstring validaton script'
+    condition: true
+
+  - script: |
+      export PATH=$HOME/miniconda3/bin:$PATH
+      source activate pandas-dev
+      git remote add upstream https://github.com/pandas-dev/pandas.git
+      git fetch upstream
+      if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
+          cd asv_bench
+          asv machine --yes
+          ASV_OUTPUT="$(asv dev)"
+          if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then
+              echo "##vso[task.logissue type=error]Benchmarks run with errors"
+              echo $ASV_OUTPUT
+              exit 1
+          else
+              echo "Benchmarks run without errors"
+          fi
+      else
+          echo "Benchmarks did not run, no changes detected"
+      fi
+    displayName: 'Running benchmarks'
+    condition: true
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 5d0356dc8be9c4..a8a86eedb05490 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -5,25 +5,48 @@
 # This script is intended for both the CI and to check locally that code standards are
 # respected. We are currently linting (PEP-8 and similar), looking for patterns of
 # common mistakes (sphinx directives with missing blank lines, old style classes,
-# unwanted imports...), and we also run doctests here (currently some files only).
-# In the future we may want to add the validation of docstrings and other checks here.
+# unwanted imports...), we run doctests here (currently some files only), and we
+# validate formatting error in docstrings.
 #
 # Usage:
 #   $ ./ci/code_checks.sh               # run all checks
 #   $ ./ci/code_checks.sh lint          # run linting only
 #   $ ./ci/code_checks.sh patterns      # check for patterns that should not exist
+#   $ ./ci/code_checks.sh code          # checks on imported code
 #   $ ./ci/code_checks.sh doctests      # run doctests
+#   $ ./ci/code_checks.sh docstrings    # validate docstring errors
 #   $ ./ci/code_checks.sh dependencies  # check that dependencies are consistent
 
-echo "inside $0"
-[[ $LINT ]] || { echo "NOT Linting. To lint use: LINT=true $0 $1"; exit 0; }
-[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" || "$1" == "dependencies"  ]] \
-    || { echo "Unknown command $1. Usage: $0 [lint|patterns|doctests|dependencies]"; exit 9999; }
+[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "dependencies" ]] || \
+    { echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|dependencies]"; exit 9999; }
 
 BASE_DIR="$(dirname $0)/.."
 RET=0
 CHECK=$1
 
+function invgrep {
+    # grep with inverse exist status and formatting for azure-pipelines
+    #
+    # This function works exactly as grep, but with opposite exit status:
+    # - 0 (success) when no patterns are found
+    # - 1 (fail) when the patterns are found
+    #
+    # This is useful for the CI, as we want to fail if one of the patterns
+    # that we want to avoid is found by grep.
+    if [[ "$AZURE" == "true" ]]; then
+        set -o pipefail
+        grep -n "$@" | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Found unwanted pattern: " $3}'
+    else
+        grep "$@"
+    fi
+    return $((! $?))
+}
+
+if [[ "$AZURE" == "true" ]]; then
+    FLAKE8_FORMAT="##vso[task.logissue type=error;sourcepath=%(path)s;linenumber=%(row)s;columnnumber=%(col)s;code=%(code)s;]%(text)s"
+else
+    FLAKE8_FORMAT="default"
+fi
 
 ### LINTING ###
 if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
@@ -35,22 +58,22 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
 
     # pandas/_libs/src is C code, so no need to search there.
     MSG='Linting .py code' ; echo $MSG
-    flake8 .
+    flake8 --format="$FLAKE8_FORMAT" .
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Linting .pyx code' ; echo $MSG
-    flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
+    flake8 --format="$FLAKE8_FORMAT" pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Linting .pxd and .pxi.in' ; echo $MSG
-    flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
+    flake8 --format="$FLAKE8_FORMAT" pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     echo "flake8-rst --version"
     flake8-rst --version
 
     MSG='Linting code-blocks in .rst documentation' ; echo $MSG
-    flake8-rst doc/source --filename=*.rst
+    flake8-rst doc/source --filename=*.rst --format="$FLAKE8_FORMAT"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     # Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
@@ -58,7 +81,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     # Note: this grep pattern is (intended to be) equivalent to the python
     # regex r'(?<![ ->])> '
     MSG='Linting .pyx code for spacing conventions in casting' ; echo $MSG
-    ! grep -r -E --include '*.pyx' --include '*.pxi.in' '[a-zA-Z0-9*]> ' pandas/_libs
+    invgrep -r -E --include '*.pyx' --include '*.pxi.in' '[a-zA-Z0-9*]> ' pandas/_libs
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     # readability/casting: Warnings about C casting instead of C++ casting
@@ -88,43 +111,48 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
 
     # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
     MSG='Check for non-standard imports' ; echo $MSG
-    ! grep -R --include="*.py*" -E "from pandas.core.common import " pandas
+    invgrep -R --include="*.py*" -E "from pandas.core.common import " pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check for pytest warns' ; echo $MSG
-    ! grep -r -E --include '*.py' 'pytest\.warns' pandas/tests/
+    invgrep -r -E --include '*.py' 'pytest\.warns' pandas/tests/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     # Check for the following code in testing: `np.testing` and `np.array_equal`
     MSG='Check for invalid testing' ; echo $MSG
-    ! grep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/
+    invgrep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     # Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal`
     MSG='Check for invalid EA testing' ; echo $MSG
-    ! grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base
+    invgrep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check for deprecated messages without sphinx directive' ; echo $MSG
-    ! grep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
+    invgrep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check for old-style classes' ; echo $MSG
-    ! grep -R --include="*.py" -E "class\s\S*[^)]:" pandas scripts
+    invgrep -R --include="*.py" -E "class\s\S*[^)]:" pandas scripts
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
-    ! grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
+    invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check for incorrect sphinx directives' ; echo $MSG
-    ! grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source
+    invgrep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check that the deprecated `assert_raises_regex` is not used (`pytest.raises(match=pattern)` should be used instead)' ; echo $MSG
-    ! grep -R --exclude=*.pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas
+    invgrep -R --exclude=*.pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+fi
+
+### CODE ###
+if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
+
     MSG='Check for modules that pandas should not import' ; echo $MSG
     python -c "
 import sys
@@ -135,7 +163,7 @@ blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2' 'hypothesis',
              'tables', 'xlrd', 'xlsxwriter', 'xlwt'}
 mods = blacklist & set(m.split('.')[0] for m in sys.modules)
 if mods:
-    sys.stderr.write('pandas should not import: {}\n'.format(', '.join(mods)))
+    sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods)))
     sys.exit(len(mods))
     "
     RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -157,7 +185,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
 
     MSG='Doctests generic.py' ; echo $MSG
     pytest -q --doctest-modules pandas/core/generic.py \
-        -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs"
+        -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests top-level reshaping functions' ; echo $MSG
@@ -178,11 +206,22 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
 
 fi
 
+### DOCSTRINGS ###
+if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
+
+    MSG='Validate docstrings (GL06, SS04, PR03, PR05, EX04)' ; echo $MSG
+    $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,SS04,PR03,PR05,EX04
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+fi
+
 ### DEPENDENCIES ###
 if [[ -z "$CHECK" || "$CHECK" == "dependencies" ]]; then
+
     MSG='Check that requirements-dev.txt has been generated from environment.yml' ; echo $MSG
-    $BASE_DIR/scripts/generate_pip_deps_from_conda.py --compare
+    $BASE_DIR/scripts/generate_pip_deps_from_conda.py --compare --azure
     RET=$(($RET + $?)) ; echo $MSG "DONE"
+
 fi
 
 exit $RET
diff --git a/ci/deps/travis-36.yaml b/ci/deps/travis-36.yaml
index de76f5d6d763ff..bfd69652730ed2 100644
--- a/ci/deps/travis-36.yaml
+++ b/ci/deps/travis-36.yaml
@@ -7,16 +7,9 @@ dependencies:
   - cython>=0.28.2
   - dask
   - fastparquet
-  - flake8>=3.5
-  - flake8-comprehensions
-  - flake8-rst>=0.6.0
   - gcsfs
   - geopandas
   - html5lib
-  - ipython
-  - isort
-  - jinja2
-  - lxml
   - matplotlib
   - nomkl
   - numexpr
@@ -32,7 +25,6 @@ dependencies:
   - s3fs
   - scikit-learn
   - scipy
-  - seaborn
   - sqlalchemy
   - statsmodels
   - xarray
@@ -48,6 +40,5 @@ dependencies:
   - pip:
     - brotlipy
     - coverage
-    - cpplint
     - pandas-datareader
     - python-dateutil
diff --git a/environment.yml b/environment.yml
index 4daaa90247fa8b..e31511e5b8afe3 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,6 +10,7 @@ dependencies:
   - pytz
 
   # development
+  - asv
   - cython>=0.28.2
   - flake8
   - flake8-comprehensions
@@ -48,3 +49,5 @@ dependencies:
   - xlrd
   - xlsxwriter
   - xlwt
+  - pip:
+    - cpplint
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f50be694b47c6f..b9f32042924b93 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1392,10 +1392,6 @@ def to_gbq(self, destination_table, project_id=None, chunksize=None,
                 If table exists, drop it, recreate it, and insert data.
             ``'append'``
                 If table exists, insert data. Create if does not exist.
-        private_key : str, optional
-            Service account private key in JSON format. Can be file path
-            or string contents. This is useful for remote server
-            authentication (eg. Jupyter/IPython notebook on remote host).
         auth_local_webserver : bool, default False
             Use the `local webserver flow`_ instead of the `console flow`_
             when getting user credentials.
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 65dfd45fcb9c22..bfa00d13524014 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -125,10 +125,10 @@ class Panel(NDFrame):
         axis=1
     minor_axis : Index or array-like
         axis=2
-    dtype : dtype, default None
-        Data type to force, otherwise infer
     copy : boolean, default False
         Copy data from inputs. Only affects DataFrame / 2d ndarray input
+    dtype : dtype, default None
+        Data type to force, otherwise infer
     """
 
     @property
diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py
index 7b0a3da738436b..6bcf56c306e6a6 100644
--- a/pandas/core/tools/timedeltas.py
+++ b/pandas/core/tools/timedeltas.py
@@ -50,7 +50,7 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'):
     timedelta64 or numpy.array of timedelta64
         Output type returned if parsing succeeded.
 
-    See also
+    See Also
     --------
     DataFrame.astype : Cast argument to a specified dtype.
     to_datetime : Convert argument to datetime.
diff --git a/pandas/core/window.py b/pandas/core/window.py
index faaef4211ca8e8..68a36fb2a69993 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -30,15 +30,14 @@
 
 _shared_docs = dict(**_shared_docs)
 _doc_template = """
+        Returns
+        -------
+        same type as input
 
-Returns
--------
-same type as input
-
-See Also
---------
-pandas.Series.%(name)s
-pandas.DataFrame.%(name)s
+        See Also
+        --------
+        Series.%(name)s
+        DataFrame.%(name)s
 """
 
 
@@ -1340,23 +1339,25 @@ def f(arg, *args, **kwargs):
         return self._apply(f, 'quantile', quantile=quantile,
                            **kwargs)
 
-    _shared_docs['cov'] = dedent("""
-    Calculate the %(name)s sample covariance.
+    _shared_docs['cov'] = """
+        Calculate the %(name)s sample covariance.
 
-    Parameters
-    ----------
-    other : Series, DataFrame, or ndarray, optional
-        if not supplied then will default to self and produce pairwise output
-    pairwise : bool, default None
-        If False then only matching columns between self and other will be used
-        and the output will be a DataFrame.
-        If True then all pairwise combinations will be calculated and the
-        output will be a MultiIndexed DataFrame in the case of DataFrame
-        inputs. In the case of missing elements, only complete pairwise
-        observations will be used.
-    ddof : int, default 1
-        Delta Degrees of Freedom.  The divisor used in calculations
-        is ``N - ddof``, where ``N`` represents the number of elements.""")
+        Parameters
+        ----------
+        other : Series, DataFrame, or ndarray, optional
+            If not supplied then will default to self and produce pairwise
+            output.
+        pairwise : bool, default None
+            If False then only matching columns between self and other will be
+            used and the output will be a DataFrame.
+            If True then all pairwise combinations will be calculated and the
+            output will be a MultiIndexed DataFrame in the case of DataFrame
+            inputs. In the case of missing elements, only complete pairwise
+            observations will be used.
+        ddof : int, default 1
+            Delta Degrees of Freedom.  The divisor used in calculations
+            is ``N - ddof``, where ``N`` represents the number of elements.
+    """
 
     def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
         if other is None:
@@ -2054,28 +2055,27 @@ def _constructor(self):
 
 
 _bias_template = """
-
-Parameters
-----------
-bias : bool, default False
-    Use a standard estimation bias correction
+        Parameters
+        ----------
+        bias : bool, default False
+            Use a standard estimation bias correction
 """
 
 _pairwise_template = """
-
-Parameters
-----------
-other : Series, DataFrame, or ndarray, optional
-    if not supplied then will default to self and produce pairwise output
-pairwise : bool, default None
-    If False then only matching columns between self and other will be used and
-    the output will be a DataFrame.
-    If True then all pairwise combinations will be calculated and the output
-    will be a MultiIndex DataFrame in the case of DataFrame inputs.
-    In the case of missing elements, only complete pairwise observations will
-    be used.
-bias : bool, default False
-   Use a standard estimation bias correction
+        Parameters
+        ----------
+        other : Series, DataFrame, or ndarray, optional
+            If not supplied then will default to self and produce pairwise
+            output.
+        pairwise : bool, default None
+            If False then only matching columns between self and other will be
+            used and the output will be a DataFrame.
+            If True then all pairwise combinations will be calculated and the
+            output will be a MultiIndex DataFrame in the case of DataFrame
+            inputs. In the case of missing elements, only complete pairwise
+            observations will be used.
+        bias : bool, default False
+           Use a standard estimation bias correction
 """
 
 
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
index 4d5b2fda7cd108..639b68d433ac61 100644
--- a/pandas/io/gbq.py
+++ b/pandas/io/gbq.py
@@ -52,10 +52,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     reauth : boolean, default False
         Force Google BigQuery to re-authenticate the user. This is useful
         if multiple accounts are used.
-    private_key : str, optional
-        Service account private key in JSON format. Can be file path
-        or string contents. This is useful for remote server
-        authentication (eg. Jupyter/IPython notebook on remote host).
     auth_local_webserver : boolean, default False
         Use the `local webserver flow`_ instead of the `console flow`_
         when getting user credentials.
@@ -107,10 +103,6 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
         *New in version 0.8.0 of pandas-gbq*.
 
         .. versionadded:: 0.24.0
-    verbose : None, deprecated
-        Deprecated in pandas-gbq version 0.4.0. Use the `logging module to
-        adjust verbosity instead
-        <https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
     private_key : str, deprecated
         Deprecated in pandas-gbq version 0.8.0. Use the ``credentials``
         parameter and
@@ -122,6 +114,10 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
         Service account private key in JSON format. Can be file path
         or string contents. This is useful for remote server
         authentication (eg. Jupyter/IPython notebook on remote host).
+    verbose : None, deprecated
+        Deprecated in pandas-gbq version 0.4.0. Use the `logging module to
+        adjust verbosity instead
+        <https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
 
     Returns
     -------
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
index 630943f4ec1bb1..21c8064ebcac50 100644
--- a/pandas/io/json/json.py
+++ b/pandas/io/json/json.py
@@ -311,13 +311,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
         is to try and detect the correct precision, but if this is not desired
         then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds,
         milliseconds, microseconds or nanoseconds respectively.
-    lines : boolean, default False
-        Read the file as a json object per line.
+    encoding : str, default is 'utf-8'
+        The encoding to use to decode py3 bytes.
 
         .. versionadded:: 0.19.0
 
-    encoding : str, default is 'utf-8'
-        The encoding to use to decode py3 bytes.
+    lines : boolean, default False
+        Read the file as a json object per line.
 
         .. versionadded:: 0.19.0
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 5e2da69df5f269..facadf384f7702 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,6 +1,7 @@
 numpy>=1.15
 python-dateutil>=2.5.0
 pytz
+asv
 cython>=0.28.2
 flake8
 flake8-comprehensions
@@ -36,4 +37,5 @@ statsmodels
 xarray
 xlrd
 xlsxwriter
-xlwt
\ No newline at end of file
+xlwt
+cpplint
\ No newline at end of file
diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py
index 1f79b23a259dc0..7b6eb1f9a32b55 100755
--- a/scripts/generate_pip_deps_from_conda.py
+++ b/scripts/generate_pip_deps_from_conda.py
@@ -75,7 +75,18 @@ def main(conda_fname, pip_fname, compare=False):
     with open(conda_fname) as conda_fd:
         deps = yaml.safe_load(conda_fd)['dependencies']
 
-    pip_content = '\n'.join(filter(None, map(conda_package_to_pip, deps)))
+    pip_deps = []
+    for dep in deps:
+        if isinstance(dep, str):
+            conda_dep = conda_package_to_pip(dep)
+            if conda_dep:
+                pip_deps.append(conda_dep)
+        elif isinstance(dep, dict) and len(dep) == 1 and 'pip' in dep:
+            pip_deps += dep['pip']
+        else:
+            raise ValueError('Unexpected dependency {}'.format(dep))
+
+    pip_content = '\n'.join(pip_deps)
 
     if compare:
         with open(pip_fname) as pip_fd:
@@ -92,6 +103,9 @@ def main(conda_fname, pip_fname, compare=False):
     argparser.add_argument('--compare',
                            action='store_true',
                            help='compare whether the two files are equivalent')
+    argparser.add_argument('--azure',
+                           action='store_true',
+                           help='show the output in azure-pipelines format')
     args = argparser.parse_args()
 
     repo_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
@@ -99,7 +113,10 @@ def main(conda_fname, pip_fname, compare=False):
                os.path.join(repo_path, 'requirements-dev.txt'),
                compare=args.compare)
     if res:
-        sys.stderr.write('`requirements-dev.txt` has to be generated with '
-                         '`{}` after `environment.yml` is modified.\n'.format(
-                             sys.argv[0]))
+        msg = ('`requirements-dev.txt` has to be generated with `{}` after '
+               '`environment.yml` is modified.\n'.format(sys.argv[0]))
+        if args.azure:
+            msg = ('##vso[task.logissue type=error;'
+                   'sourcepath=requirements-dev.txt]{}'.format(msg))
+        sys.stderr.write(msg)
     sys.exit(res)

From b7bdf7cd24ed719e676c1eda50f486c43644ddca Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <koustav201197@gmail.com>
Date: Mon, 3 Dec 2018 00:34:53 -0500
Subject: [PATCH 72/78] ZeroDivisionError when groupby rank with method="dense"
 and pct=True (#23864)

---
 doc/source/whatsnew/v0.24.0.rst    |  1 +
 pandas/_libs/groupby_helper.pxi.in |  2 +-
 pandas/tests/groupby/test_rank.py  | 15 +++++++++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 3913e101d4667f..7617ad5b428a27 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1513,6 +1513,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`DataFrame.expanding` in which the ``axis`` argument was not being respected during aggregations (:issue:`23372`)
 - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` which caused missing values when the input function can accept a :class:`DataFrame` but renames it (:issue:`23455`).
 - Bug in :func:`pandas.core.groupby.GroupBy.nth` where column order was not always preserved (:issue:`20760`)
+- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`).
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 523d43f893aadd..abac9f147848e7 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -587,7 +587,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
                 # rankings, so we assign them percentages of NaN.
                 if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
                     out[i, 0] = NAN
-                else:
+                elif grp_sizes[i, 0] != 0:
                     out[i, 0] = out[i, 0] / grp_sizes[i, 0]
 {{endif}}
 {{endfor}}
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 59284f4a5d47e2..e58e12ab83143f 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -288,3 +288,18 @@ def test_rank_empty_group():
     result = df.groupby(column).rank(pct=True)
     expected = DataFrame({"B": [0.5, np.nan, 1.0]})
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("input_key,input_value,output_value", [
+    ([1, 2], [1, 1], [1.0, 1.0]),
+    ([1, 1, 2, 2], [1, 2, 1, 2], [0.5, 1.0, 0.5, 1.0]),
+    ([1, 1, 2, 2], [1, 2, 1, np.nan], [0.5, 1.0, 1.0, np.nan]),
+    ([1, 1, 2], [1, 2, np.nan], [0.5, 1.0, np.nan])
+])
+def test_rank_zero_div(input_key, input_value, output_value):
+    # GH 23666
+    df = DataFrame({"A": input_key, "B": input_value})
+
+    result = df.groupby("A").rank(method="dense", pct=True)
+    expected = DataFrame({"B": output_value})
+    tm.assert_frame_equal(result, expected)

From 8b0f9b37f7b434c996328e880e71594fbb46c934 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Mon, 3 Dec 2018 12:28:55 +0000
Subject: [PATCH 73/78] DOC: Stop ignoring fixed rst files in flake8-rst
 (#24051)

---
 doc/source/io.rst | 230 ++++++++++++++++++++++------------------------
 setup.cfg         |   7 --
 2 files changed, 111 insertions(+), 126 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 372a7b8a325e74..fbd238586c7764 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1151,7 +1151,7 @@ Let us consider some examples:
 
 .. code-block:: python
 
-   read_csv(path, na_values=[5])
+   pd.read_csv('path_to_file.csv', na_values=[5])
 
 In the example above ``5`` and ``5.0`` will be recognized as ``NaN``, in
 addition to the defaults. A string will first be interpreted as a numerical
@@ -1159,19 +1159,19 @@ addition to the defaults. A string will first be interpreted as a numerical
 
 .. code-block:: python
 
-   read_csv(path, keep_default_na=False, na_values=[""])
+   pd.read_csv('path_to_file.csv', keep_default_na=False, na_values=[""])
 
 Above, only an empty field will be recognized as ``NaN``.
 
 .. code-block:: python
 
-   read_csv(path, keep_default_na=False, na_values=["NA", "0"])
+   pd.read_csv('path_to_file.csv', keep_default_na=False, na_values=["NA", "0"])
 
 Above, both ``NA`` and ``0`` as strings are ``NaN``.
 
 .. code-block:: python
 
-   read_csv(path, na_values=["Nope"])
+   pd.read_csv('path_to_file.csv', na_values=["Nope"])
 
 The default values, in addition to the string ``"Nope"`` are recognized as
 ``NaN``.
@@ -1245,24 +1245,13 @@ too few fields will have NA values filled in the trailing fields. Lines with
 too many fields will raise an error by default:
 
 .. ipython:: python
-   :suppress:
+    :okexcept:
 
     data = ('a,b,c\n'
             '1,2,3\n'
             '4,5,6,7\n'
             '8,9,10')
-
-.. code-block:: ipython
-
-    In [27]: data = ('a,b,c\n'
-                     '1,2,3\n'
-                     '4,5,6,7\n'
-                     '8,9,10')
-
-    In [28]: pd.read_csv(StringIO(data))
-    ---------------------------------------------------------------------------
-    ParserError                              Traceback (most recent call last)
-    ParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4
+    pd.read_csv(StringIO(data))
 
 You can elect to skip bad lines:
 
@@ -2754,7 +2743,7 @@ file, and the ``sheet_name`` indicating which sheet to parse.
 .. code-block:: python
 
    # Returns a DataFrame
-   read_excel('path_to_file.xls', sheet_name='Sheet1')
+   pd.read_excel('path_to_file.xls', sheet_name='Sheet1')
 
 
 .. _io.excel.excelfile_class:
@@ -2803,14 +2792,14 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc
     # using the ExcelFile class
     data = {}
     with pd.ExcelFile('path_to_file.xls') as xls:
-        data['Sheet1'] = read_excel(xls, 'Sheet1', index_col=None,
-                                    na_values=['NA'])
-        data['Sheet2'] = read_excel(xls, 'Sheet2', index_col=None,
-                                    na_values=['NA'])
+        data['Sheet1'] = pd.read_excel(xls, 'Sheet1', index_col=None,
+                                       na_values=['NA'])
+        data['Sheet2'] = pd.read_excel(xls, 'Sheet2', index_col=None,
+                                       na_values=['NA'])
 
     # equivalent using the read_excel function
-    data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'],
-                      index_col=None, na_values=['NA'])
+    data = pd.read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'],
+                         index_col=None, na_values=['NA'])
 
 .. _io.excel.specifying_sheets:
 
@@ -2832,35 +2821,35 @@ Specifying Sheets
 .. code-block:: python
 
    # Returns a DataFrame
-   read_excel('path_to_file.xls', 'Sheet1', index_col=None, na_values=['NA'])
+   pd.read_excel('path_to_file.xls', 'Sheet1', index_col=None, na_values=['NA'])
 
 Using the sheet index:
 
 .. code-block:: python
 
    # Returns a DataFrame
-   read_excel('path_to_file.xls', 0, index_col=None, na_values=['NA'])
+   pd.read_excel('path_to_file.xls', 0, index_col=None, na_values=['NA'])
 
 Using all default values:
 
 .. code-block:: python
 
    # Returns a DataFrame
-   read_excel('path_to_file.xls')
+   pd.read_excel('path_to_file.xls')
 
 Using None to get all sheets:
 
 .. code-block:: python
 
    # Returns a dictionary of DataFrames
-   read_excel('path_to_file.xls', sheet_name=None)
+   pd.read_excel('path_to_file.xls', sheet_name=None)
 
 Using a list to get multiple sheets:
 
 .. code-block:: python
 
    # Returns the 1st and 4th sheet, as a dictionary of DataFrames.
-   read_excel('path_to_file.xls', sheet_name=['Sheet1', 3])
+   pd.read_excel('path_to_file.xls', sheet_name=['Sheet1', 3])
 
 ``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either
 a list of sheet names, a list of sheet positions, or ``None`` to read all sheets.
@@ -2932,20 +2921,20 @@ to be parsed.
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', 'Sheet1', usecols=2)
+   pd.read_excel('path_to_file.xls', 'Sheet1', usecols=2)
 
 You can also specify a comma-delimited set of Excel columns and ranges as a string:
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', 'Sheet1', usecols='A,C:E')
+   pd.read_excel('path_to_file.xls', 'Sheet1', usecols='A,C:E')
 
 If ``usecols`` is a list of integers, then it is assumed to be the file column
 indices to be parsed.
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', 'Sheet1', usecols=[0, 2, 3])
+   pd.read_excel('path_to_file.xls', 'Sheet1', usecols=[0, 2, 3])
 
 Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
 
@@ -2957,7 +2946,7 @@ document header row(s). Those strings define which columns will be parsed:
 
 .. code-block:: python
 
-    read_excel('path_to_file.xls', 'Sheet1', usecols=['foo', 'bar'])
+    pd.read_excel('path_to_file.xls', 'Sheet1', usecols=['foo', 'bar'])
 
 Element order is ignored, so ``usecols=['baz', 'joe']`` is the same as ``['joe', 'baz']``.
 
@@ -2968,7 +2957,7 @@ the column names, returning names where the callable function evaluates to ``Tru
 
 .. code-block:: python
 
-    read_excel('path_to_file.xls', 'Sheet1', usecols=lambda x: x.isalpha())
+    pd.read_excel('path_to_file.xls', 'Sheet1', usecols=lambda x: x.isalpha())
 
 Parsing Dates
 +++++++++++++
@@ -2980,7 +2969,7 @@ use the ``parse_dates`` keyword to parse those strings to datetimes:
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings'])
+   pd.read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings'])
 
 
 Cell Converters
@@ -2991,7 +2980,7 @@ option. For instance, to convert a column to boolean:
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', 'Sheet1', converters={'MyBools': bool})
+   pd.read_excel('path_to_file.xls', 'Sheet1', converters={'MyBools': bool})
 
 This options handles missing values and treats exceptions in the converters
 as missing data. Transformations are applied cell by cell rather than to the
@@ -3006,7 +2995,7 @@ missing data to recover integer dtype:
        return int(x) if x else -1
 
 
-   read_excel('path_to_file.xls', 'Sheet1', converters={'MyInts': cfun})
+   pd.read_excel('path_to_file.xls', 'Sheet1', converters={'MyInts': cfun})
 
 dtype Specifications
 ++++++++++++++++++++
@@ -3020,7 +3009,7 @@ no type inference, use the type ``str`` or ``object``.
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', dtype={'MyInts': 'int64', 'MyText': str})
+   pd.read_excel('path_to_file.xls', dtype={'MyInts': 'int64', 'MyText': str})
 
 .. _io.excel_writer:
 
@@ -5126,7 +5115,7 @@ If you have an SQLAlchemy description of your database you can express where con
                          sa.Column('Col_3', sa.Boolean),
                          )
 
-   pd.read_sql(sa.select([data_table]).where(data_table.c.Col_3 == True), engine)
+   pd.read_sql(sa.select([data_table]).where(data_table.c.Col_3 is True), engine)
 
 You can combine SQLAlchemy expressions with parameters passed to :func:`read_sql` using :func:`sqlalchemy.bindparam`
 
@@ -5155,7 +5144,7 @@ And then issue the following queries:
 
 .. code-block:: python
 
-   data.to_sql('data', cnx)
+   data.to_sql('data', con)
    pd.read_sql_query("SELECT * FROM data", con)
 
 
@@ -5372,6 +5361,9 @@ Obtain an iterator and read an XPORT file 100,000 lines at a time:
 
 .. code-block:: python
 
+    def do_something(chunk):
+        pass
+
     rdr = pd.read_sas('sas_xport.xpt', chunk=100000)
     for chunk in rdr:
         do_something(chunk)
@@ -5424,85 +5416,7 @@ ignored.
    dtypes: float64(1), int64(1)
    memory usage: 15.3 MB
 
-When writing, the top-three functions in terms of speed are are
-``test_pickle_write``, ``test_feather_write`` and ``test_hdf_fixed_write_compress``.
-
-.. code-block:: ipython
-
-   In [14]: %timeit test_sql_write(df)
-   2.37 s ± 36.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [15]: %timeit test_hdf_fixed_write(df)
-   194 ms ± 65.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [26]: %timeit test_hdf_fixed_write_compress(df)
-   119 ms ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [16]: %timeit test_hdf_table_write(df)
-   623 ms ± 125 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [27]: %timeit test_hdf_table_write_compress(df)
-   563 ms ± 23.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [17]: %timeit test_csv_write(df)
-   3.13 s ± 49.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [30]: %timeit test_feather_write(df)
-   103 ms ± 5.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [31]: %timeit test_pickle_write(df)
-   109 ms ± 3.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [32]: %timeit test_pickle_write_compress(df)
-   3.33 s ± 55.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-When reading, the top three are ``test_feather_read``, ``test_pickle_read`` and
-``test_hdf_fixed_read``.
-
-.. code-block:: ipython
-
-   In [18]: %timeit test_sql_read()
-   1.35 s ± 14.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [19]: %timeit test_hdf_fixed_read()
-   14.3 ms ± 438 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
-
-   In [28]: %timeit test_hdf_fixed_read_compress()
-   23.5 ms ± 672 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [20]: %timeit test_hdf_table_read()
-   35.4 ms ± 314 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [29]: %timeit test_hdf_table_read_compress()
-   42.6 ms ± 2.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
-
-   In [22]: %timeit test_csv_read()
-   516 ms ± 27.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-   In [33]: %timeit test_feather_read()
-   4.06 ms ± 115 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
-
-   In [34]: %timeit test_pickle_read()
-   6.5 ms ± 172 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
-
-   In [35]: %timeit test_pickle_read_compress()
-   588 ms ± 3.57 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
-
-Space on disk (in bytes)
-
-.. code-block:: none
-
-    34816000 Aug 21 18:00 test.sql
-    24009240 Aug 21 18:00 test_fixed.hdf
-     7919610 Aug 21 18:00 test_fixed_compress.hdf
-    24458892 Aug 21 18:00 test_table.hdf
-     8657116 Aug 21 18:00 test_table_compress.hdf
-    28520770 Aug 21 18:00 test.csv
-    16000248 Aug 21 18:00 test.feather
-    16000848 Aug 21 18:00 test.pkl
-     7554108 Aug 21 18:00 test.pkl.compress
-
-And here's the code:
+Given the next test set:
 
 .. code-block:: python
 
@@ -5589,3 +5503,81 @@ And here's the code:
 
    def test_pickle_read_compress():
        pd.read_pickle('test.pkl.compress', compression='xz')
+
+When writing, the top-three functions in terms of speed are are
+``test_pickle_write``, ``test_feather_write`` and ``test_hdf_fixed_write_compress``.
+
+.. code-block:: ipython
+
+   In [14]: %timeit test_sql_write(df)
+   2.37 s ± 36.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [15]: %timeit test_hdf_fixed_write(df)
+   194 ms ± 65.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [26]: %timeit test_hdf_fixed_write_compress(df)
+   119 ms ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [16]: %timeit test_hdf_table_write(df)
+   623 ms ± 125 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [27]: %timeit test_hdf_table_write_compress(df)
+   563 ms ± 23.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [17]: %timeit test_csv_write(df)
+   3.13 s ± 49.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [30]: %timeit test_feather_write(df)
+   103 ms ± 5.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [31]: %timeit test_pickle_write(df)
+   109 ms ± 3.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [32]: %timeit test_pickle_write_compress(df)
+   3.33 s ± 55.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+When reading, the top three are ``test_feather_read``, ``test_pickle_read`` and
+``test_hdf_fixed_read``.
+
+.. code-block:: ipython
+
+   In [18]: %timeit test_sql_read()
+   1.35 s ± 14.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [19]: %timeit test_hdf_fixed_read()
+   14.3 ms ± 438 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+
+   In [28]: %timeit test_hdf_fixed_read_compress()
+   23.5 ms ± 672 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [20]: %timeit test_hdf_table_read()
+   35.4 ms ± 314 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [29]: %timeit test_hdf_table_read_compress()
+   42.6 ms ± 2.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
+
+   In [22]: %timeit test_csv_read()
+   516 ms ± 27.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+   In [33]: %timeit test_feather_read()
+   4.06 ms ± 115 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+
+   In [34]: %timeit test_pickle_read()
+   6.5 ms ± 172 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+
+   In [35]: %timeit test_pickle_read_compress()
+   588 ms ± 3.57 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
+
+Space on disk (in bytes)
+
+.. code-block:: none
+
+    34816000 Aug 21 18:00 test.sql
+    24009240 Aug 21 18:00 test_fixed.hdf
+     7919610 Aug 21 18:00 test_fixed_compress.hdf
+    24458892 Aug 21 18:00 test_table.hdf
+     8657116 Aug 21 18:00 test_table_compress.hdf
+    28520770 Aug 21 18:00 test.csv
+    16000248 Aug 21 18:00 test.feather
+    16000848 Aug 21 18:00 test.pkl
+     7554108 Aug 21 18:00 test.pkl.compress
diff --git a/setup.cfg b/setup.cfg
index b9994e9ea0b2c2..1d26bcab28ef09 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -69,24 +69,17 @@ exclude =
     doc/source/advanced.rst
     doc/source/basics.rst
     doc/source/categorical.rst
-    doc/source/comparison_with_r.rst
-    doc/source/comparison_with_sql.rst
-    doc/source/comparison_with_stata.rst
-    doc/source/computation.rst
     doc/source/contributing_docstring.rst
     doc/source/dsintro.rst
     doc/source/enhancingperf.rst
     doc/source/extending.rst
     doc/source/groupby.rst
     doc/source/indexing.rst
-    doc/source/io.rst
     doc/source/merging.rst
     doc/source/missing_data.rst
     doc/source/options.rst
     doc/source/release.rst
     doc/source/reshaping.rst
-    doc/source/timedeltas.rst
-    doc/source/timeseries.rst
     doc/source/visualization.rst
 
 

From 367efb280c69a55b676410bddc4497e744f90755 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Mon, 3 Dec 2018 12:30:52 +0000
Subject: [PATCH 74/78] TST/CLN: parametrize
 tests\resample\test_time_grouper.py (#24013)

---
 pandas/tests/resample/conftest.py          |  22 ++++
 pandas/tests/resample/test_base.py         |   2 +-
 pandas/tests/resample/test_time_grouper.py | 137 +++++++--------------
 3 files changed, 68 insertions(+), 93 deletions(-)
 create mode 100644 pandas/tests/resample/conftest.py

diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py
new file mode 100644
index 00000000000000..2130bd635b180b
--- /dev/null
+++ b/pandas/tests/resample/conftest.py
@@ -0,0 +1,22 @@
+import pytest
+
+from pandas.tests.resample.test_base import (
+    downsample_methods, resample_methods, upsample_methods)
+
+
+@pytest.fixture(params=downsample_methods)
+def downsample_method(request):
+    """Fixture for parametrization of Grouper downsample methods."""
+    return request.param
+
+
+@pytest.fixture(params=upsample_methods)
+def upsample_method(request):
+    """Fixture for parametrization of Grouper upsample methods."""
+    return request.param
+
+
+@pytest.fixture(params=resample_methods)
+def resample_method(request):
+    """Fixture for parametrization of Grouper resample methods."""
+    return request.param
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
index 8d710289aecc1c..db2162e9357e24 100644
--- a/pandas/tests/resample/test_base.py
+++ b/pandas/tests/resample/test_base.py
@@ -26,7 +26,7 @@
 
 # The various methods we support
 downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem',
-                      'median', 'prod', 'var', 'ohlc', 'quantile']
+                      'median', 'prod', 'var', 'std', 'ohlc', 'quantile']
 upsample_methods = ['count', 'size']
 series_methods = ['nunique']
 resample_methods = downsample_methods + upsample_methods + series_methods
diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py
index 927060609822e0..ec29b55ac9d67c 100644
--- a/pandas/tests/resample/test_time_grouper.py
+++ b/pandas/tests/resample/test_time_grouper.py
@@ -1,13 +1,9 @@
-# pylint: disable=E1101
-
 from datetime import datetime
 from operator import methodcaller
 
 import numpy as np
 import pytest
 
-from pandas.compat import zip
-
 import pandas as pd
 from pandas import DataFrame, Panel, Series
 from pandas.core.indexes.datetimes import date_range
@@ -104,20 +100,21 @@ def f(x):
     tm.assert_panel_equal(result, binagg)
 
 
-def test_fails_on_no_datetime_index():
-    index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex')
-    index_funcs = (tm.makeIntIndex,
-                   tm.makeUnicodeIndex, tm.makeFloatIndex,
-                   lambda m: tm.makeCustomIndex(m, 2))
+@pytest.mark.parametrize('name, func', [
+    ('Int64Index', tm.makeIntIndex),
+    ('Index', tm.makeUnicodeIndex),
+    ('Float64Index', tm.makeFloatIndex),
+    ('MultiIndex', lambda m: tm.makeCustomIndex(m, 2))
+])
+def test_fails_on_no_datetime_index(name, func):
     n = 2
-    for name, func in zip(index_names, index_funcs):
-        index = func(n)
-        df = DataFrame({'a': np.random.randn(n)}, index=index)
+    index = func(n)
+    df = DataFrame({'a': np.random.randn(n)}, index=index)
 
-        msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
-               "or PeriodIndex, but got an instance of %r" % name)
-        with pytest.raises(TypeError, match=msg):
-            df.groupby(TimeGrouper('D'))
+    msg = ("Only valid with DatetimeIndex, TimedeltaIndex "
+           "or PeriodIndex, but got an instance of %r" % name)
+    with pytest.raises(TypeError, match=msg):
+        df.groupby(TimeGrouper('D'))
 
 
 def test_aaa_group_order():
@@ -143,11 +140,13 @@ def test_aaa_group_order():
                           df[4::5])
 
 
-def test_aggregate_normal():
-    # check TimeGrouper's aggregation is identical as normal groupby
+def test_aggregate_normal(resample_method):
+    """Check TimeGrouper's aggregation is identical as normal groupby."""
 
-    n = 20
-    data = np.random.randn(n, 4)
+    if resample_method == 'ohlc':
+        pytest.xfail(reason='DataError: No numeric types to aggregate')
+
+    data = np.random.randn(20, 4)
     normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
     normal_df['key'] = [1, 2, 3, 4, 5] * 4
 
@@ -159,35 +158,11 @@ def test_aggregate_normal():
     normal_grouped = normal_df.groupby('key')
     dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
 
-    for func in ['min', 'max', 'prod', 'var', 'std', 'mean']:
-        expected = getattr(normal_grouped, func)()
-        dt_result = getattr(dt_grouped, func)()
-        expected.index = date_range(start='2013-01-01', freq='D',
-                                    periods=5, name='key')
-        assert_frame_equal(expected, dt_result)
-
-    for func in ['count', 'sum']:
-        expected = getattr(normal_grouped, func)()
-        expected.index = date_range(start='2013-01-01', freq='D',
-                                    periods=5, name='key')
-        dt_result = getattr(dt_grouped, func)()
-        assert_frame_equal(expected, dt_result)
-
-    # GH 7453
-    for func in ['size']:
-        expected = getattr(normal_grouped, func)()
-        expected.index = date_range(start='2013-01-01', freq='D',
-                                    periods=5, name='key')
-        dt_result = getattr(dt_grouped, func)()
-        assert_series_equal(expected, dt_result)
-
-    # GH 7453
-    for func in ['first', 'last']:
-        expected = getattr(normal_grouped, func)()
-        expected.index = date_range(start='2013-01-01', freq='D',
-                                    periods=5, name='key')
-        dt_result = getattr(dt_grouped, func)()
-        assert_frame_equal(expected, dt_result)
+    expected = getattr(normal_grouped, resample_method)()
+    dt_result = getattr(dt_grouped, resample_method)()
+    expected.index = date_range(start='2013-01-01', freq='D',
+                                periods=5, name='key')
+    tm.assert_equal(expected, dt_result)
 
     # if TimeGrouper is used included, 'nth' doesn't work yet
 
@@ -201,34 +176,23 @@ def test_aggregate_normal():
     """
 
 
-@pytest.mark.parametrize('method, unit', [
-    ('sum', 0),
-    ('prod', 1),
+@pytest.mark.parametrize('method, method_args, unit', [
+    ('sum', dict(), 0),
+    ('sum', dict(min_count=0), 0),
+    ('sum', dict(min_count=1), np.nan),
+    ('prod', dict(), 1),
+    ('prod', dict(min_count=0), 1),
+    ('prod', dict(min_count=1), np.nan)
 ])
-def test_resample_entirly_nat_window(method, unit):
+def test_resample_entirly_nat_window(method, method_args, unit):
     s = pd.Series([0] * 2 + [np.nan] * 2,
                   index=pd.date_range('2017', periods=4))
-    # 0 / 1 by default
-    result = methodcaller(method)(s.resample("2d"))
-    expected = pd.Series([0.0, unit],
-                         index=pd.to_datetime(['2017-01-01',
-                                               '2017-01-03']))
-    tm.assert_series_equal(result, expected)
-
-    # min_count=0
-    result = methodcaller(method, min_count=0)(s.resample("2d"))
+    result = methodcaller(method, **method_args)(s.resample("2d"))
     expected = pd.Series([0.0, unit],
                          index=pd.to_datetime(['2017-01-01',
                                                '2017-01-03']))
     tm.assert_series_equal(result, expected)
 
-    # min_count=1
-    result = methodcaller(method, min_count=1)(s.resample("2d"))
-    expected = pd.Series([0.0, np.nan],
-                         index=pd.to_datetime(['2017-01-01',
-                                               '2017-01-03']))
-    tm.assert_series_equal(result, expected)
-
 
 @pytest.mark.parametrize('func, fill_value', [
     ('min', np.nan),
@@ -302,33 +266,22 @@ def test_repr():
     assert result == expected
 
 
-@pytest.mark.parametrize('method, unit', [
-    ('sum', 0),
-    ('prod', 1),
+@pytest.mark.parametrize('method, method_args, expected_values', [
+    ('sum', dict(), [1, 0, 1]),
+    ('sum', dict(min_count=0), [1, 0, 1]),
+    ('sum', dict(min_count=1), [1, np.nan, 1]),
+    ('sum', dict(min_count=2), [np.nan, np.nan, np.nan]),
+    ('prod', dict(), [1, 1, 1]),
+    ('prod', dict(min_count=0), [1, 1, 1]),
+    ('prod', dict(min_count=1), [1, np.nan, 1]),
+    ('prod', dict(min_count=2), [np.nan, np.nan, np.nan]),
 ])
-def test_upsample_sum(method, unit):
+def test_upsample_sum(method, method_args, expected_values):
     s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H"))
     resampled = s.resample("30T")
     index = pd.to_datetime(['2017-01-01T00:00:00',
                             '2017-01-01T00:30:00',
                             '2017-01-01T01:00:00'])
-
-    # 0 / 1 by default
-    result = methodcaller(method)(resampled)
-    expected = pd.Series([1, unit, 1], index=index)
-    tm.assert_series_equal(result, expected)
-
-    # min_count=0
-    result = methodcaller(method, min_count=0)(resampled)
-    expected = pd.Series([1, unit, 1], index=index)
-    tm.assert_series_equal(result, expected)
-
-    # min_count=1
-    result = methodcaller(method, min_count=1)(resampled)
-    expected = pd.Series([1, np.nan, 1], index=index)
-    tm.assert_series_equal(result, expected)
-
-    # min_count>1
-    result = methodcaller(method, min_count=2)(resampled)
-    expected = pd.Series([np.nan, np.nan, np.nan], index=index)
+    result = methodcaller(method, **method_args)(resampled)
+    expected = pd.Series(expected_values, index=index)
     tm.assert_series_equal(result, expected)

From e7991b3941d15fd6eca80db4706d2b38bbd6878c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 3 Dec 2018 05:19:32 -0800
Subject: [PATCH 75/78] REF/DEPR: DatetimeIndex constructor (#23675)

---
 pandas/core/arrays/datetimes.py  | 77 ++++++++++++++++++++++++++++++++
 pandas/core/indexes/datetimes.py | 23 +++++++---
 pandas/core/tools/datetimes.py   | 42 ++++++++---------
 3 files changed, 114 insertions(+), 28 deletions(-)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 2341187c40a9ed..45630f8109932d 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -1503,6 +1503,83 @@ def maybe_convert_dtype(data, copy):
     return data, copy
 
 
+def objects_to_datetime64ns(data, dayfirst, yearfirst,
+                            utc=False, errors="raise",
+                            require_iso8601=False, allow_object=False):
+    """
+    Convert data to array of timestamps.
+
+    Parameters
+    ----------
+    data : np.ndarray[object]
+    dayfirst : bool
+    yearfirst : bool
+    utc : bool, default False
+        Whether to convert timezone-aware timestamps to UTC
+    errors : {'raise', 'ignore', 'coerce'}
+    allow_object : bool
+        Whether to return an object-dtype ndarray instead of raising if the
+        data contains more than one timezone.
+
+    Returns
+    -------
+    result : ndarray
+        np.int64 dtype if returned values represent UTC timestamps
+        np.datetime64[ns] if returned values represent wall times
+        object if mixed timezones
+    inferred_tz : tzinfo or None
+
+    Raises
+    ------
+    ValueError : if data cannot be converted to datetimes
+    """
+    assert errors in ["raise", "ignore", "coerce"]
+
+    # if str-dtype, convert
+    data = np.array(data, copy=False, dtype=np.object_)
+
+    try:
+        result, tz_parsed = tslib.array_to_datetime(
+            data,
+            errors=errors,
+            utc=utc,
+            dayfirst=dayfirst,
+            yearfirst=yearfirst,
+            require_iso8601=require_iso8601
+        )
+    except ValueError as e:
+        try:
+            values, tz_parsed = conversion.datetime_to_datetime64(data)
+            # If tzaware, these values represent unix timestamps, so we
+            #  return them as i8 to distinguish from wall times
+            return values.view('i8'), tz_parsed
+        except (ValueError, TypeError):
+            raise e
+
+    if tz_parsed is not None:
+        # We can take a shortcut since the datetime64 numpy array
+        #  is in UTC
+        # Return i8 values to denote unix timestamps
+        return result.view('i8'), tz_parsed
+    elif is_datetime64_dtype(result):
+        # returning M8[ns] denotes wall-times; since tz is None
+        #  the distinction is a thin one
+        return result, tz_parsed
+    elif is_object_dtype(result):
+        # GH#23675 when called via `pd.to_datetime`, returning an object-dtype
+        #  array is allowed.  When called via `pd.DatetimeIndex`, we can
+        #  only accept datetime64 dtype, so raise TypeError if object-dtype
+        #  is returned, as that indicates the values can be recognized as
+        #  datetimes but they have conflicting timezones/awareness
+        if allow_object:
+            return result, tz_parsed
+        raise TypeError(result)
+    else:  # pragma: no cover
+        # GH#23675 this TypeError should never be hit, whereas the TypeError
+        #  in the object-dtype branch above is reachable.
+        raise TypeError(result)
+
+
 def _generate_regular_range(cls, start, end, periods, freq):
     """
     Generate a range of dates with the spans between dates described by
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 8f36096d128c29..b778b2132cd96e 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -17,8 +17,8 @@
 from pandas.core.dtypes.common import (
     _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype,
     is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, is_float,
-    is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar,
-    is_string_like, pandas_dtype)
+    is_integer, is_list_like, is_object_dtype, is_period_dtype, is_scalar,
+    is_string_dtype, is_string_like, pandas_dtype)
 import pandas.core.dtypes.concat as _concat
 from pandas.core.dtypes.generic import ABCSeries
 from pandas.core.dtypes.missing import isna
@@ -26,7 +26,7 @@
 from pandas.core.arrays import datetimelike as dtl
 from pandas.core.arrays.datetimes import (
     DatetimeArrayMixin as DatetimeArray, _to_m8, maybe_convert_dtype,
-    maybe_infer_tz)
+    maybe_infer_tz, objects_to_datetime64ns)
 from pandas.core.base import _shared_docs
 import pandas.core.common as com
 from pandas.core.indexes.base import Index, _index_shared_docs
@@ -281,10 +281,19 @@ def __new__(cls, data=None,
 
         # By this point we are assured to have either a numpy array or Index
         data, copy = maybe_convert_dtype(data, copy)
-        if not (is_datetime64_dtype(data) or is_datetime64tz_dtype(data) or
-                is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'):
-            data = tools.to_datetime(data, dayfirst=dayfirst,
-                                     yearfirst=yearfirst)
+
+        if is_object_dtype(data) or is_string_dtype(data):
+            # TODO: We do not have tests specific to string-dtypes,
+            #  also complex or categorical or other extension
+            copy = False
+            if lib.infer_dtype(data) == 'integer':
+                data = data.astype(np.int64)
+            else:
+                # data comes back here as either i8 to denote UTC timestamps
+                #  or M8[ns] to denote wall times
+                data, inferred_tz = objects_to_datetime64ns(
+                    data, dayfirst=dayfirst, yearfirst=yearfirst)
+                tz = maybe_infer_tz(tz, inferred_tz)
 
         if is_datetime64tz_dtype(data):
             tz = maybe_infer_tz(tz, data.tz)
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 1266b57c098cd9..7a87e33c7f97e3 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -171,7 +171,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
         - ndarray of Timestamps if box=False
     """
     from pandas import DatetimeIndex
-    from pandas.core.arrays.datetimes import maybe_convert_dtype
+    from pandas.core.arrays.datetimes import (
+        maybe_convert_dtype, objects_to_datetime64ns)
 
     if isinstance(arg, (list, tuple)):
         arg = np.array(arg, dtype='O')
@@ -233,8 +234,9 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
 
     tz_parsed = None
     result = None
-    try:
-        if format is not None:
+
+    if format is not None:
+        try:
             # shortcut formatting here
             if format == '%Y%m%d':
                 try:
@@ -266,24 +268,22 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
                         if errors == 'raise':
                             raise
                         result = arg
-
-        if result is None:
-            assert format is None or infer_datetime_format
-            result, tz_parsed = tslib.array_to_datetime(
-                arg,
-                errors=errors,
-                utc=tz == 'utc',
-                dayfirst=dayfirst,
-                yearfirst=yearfirst,
-                require_iso8601=require_iso8601
-            )
-    except ValueError as e:
-        # Fallback to try to convert datetime objects
-        try:
-            values, tz = conversion.datetime_to_datetime64(arg)
-            return DatetimeIndex._simple_new(values, name=name, tz=tz)
-        except (ValueError, TypeError):
-            raise e
+        except ValueError as e:
+            # Fallback to try to convert datetime objects if timezone-aware
+            #  datetime objects are found without passing `utc=True`
+            try:
+                values, tz = conversion.datetime_to_datetime64(arg)
+                return DatetimeIndex._simple_new(values, name=name, tz=tz)
+            except (ValueError, TypeError):
+                raise e
+
+    if result is None:
+        assert format is None or infer_datetime_format
+        utc = tz == 'utc'
+        result, tz_parsed = objects_to_datetime64ns(
+            arg, dayfirst=dayfirst, yearfirst=yearfirst,
+            utc=utc, errors=errors, require_iso8601=require_iso8601,
+            allow_object=True)
 
     if tz_parsed is not None:
         if box:

From 21568c522f643e1f0999ea5f3fbe17408316fb2f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 3 Dec 2018 05:23:59 -0800
Subject: [PATCH 76/78] Implement mul, floordiv, mod, divmod, and reversed
 directly in TimedeltaArray (#23885)

---
 pandas/core/arrays/timedeltas.py            | 244 +++++++++++++++-----
 pandas/core/indexes/base.py                 |  21 +-
 pandas/core/indexes/datetimelike.py         |   6 +
 pandas/core/indexes/timedeltas.py           |  37 ++-
 pandas/core/ops.py                          |   3 +-
 pandas/tests/arithmetic/test_datetime64.py  |   2 +-
 pandas/tests/arithmetic/test_timedelta64.py |  63 ++++-
 7 files changed, 298 insertions(+), 78 deletions(-)

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 6a7ce7033efa0f..c0cfa996810bc1 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -2,7 +2,6 @@
 from __future__ import division
 
 from datetime import timedelta
-import operator
 import warnings
 
 import numpy as np
@@ -17,13 +16,12 @@
 
 from pandas.core.dtypes.common import (
     _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
-    is_integer_dtype, is_list_like, is_object_dtype, is_string_dtype,
-    is_timedelta64_dtype)
+    is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
+    is_string_dtype, is_timedelta64_dtype)
 from pandas.core.dtypes.generic import (
     ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex)
 from pandas.core.dtypes.missing import isna
 
-from pandas.core import ops
 from pandas.core.algorithms import checked_add_with_arr, unique1d
 import pandas.core.common as com
 
@@ -106,29 +104,6 @@ def wrapper(self, other):
     return compat.set_function_name(wrapper, opname, cls)
 
 
-def _wrap_tdi_op(op):
-    """
-    Instead of re-implementing multiplication/division etc operations
-    in the Array class, for now we dispatch to the TimedeltaIndex
-    implementations.
-    """
-    # TODO: implement directly here and wrap in TimedeltaIndex, instead of
-    #  the other way around
-    def method(self, other):
-        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
-            return NotImplemented
-
-        from pandas import TimedeltaIndex
-        obj = TimedeltaIndex(self)
-        result = op(obj, other)
-        if is_timedelta64_dtype(result):
-            return type(self)(result)
-        return np.array(result)
-
-    method.__name__ = '__{name}__'.format(name=op.__name__)
-    return method
-
-
 class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
     _typ = "timedeltaarray"
     __array_priority__ = 1000
@@ -332,37 +307,41 @@ def _addsub_offset_array(self, other, op):
             raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}"
                             .format(cls=type(self).__name__))
 
-    def _evaluate_with_timedelta_like(self, other, op):
-        if isinstance(other, ABCSeries):
-            # GH#19042
+    def __mul__(self, other):
+        other = lib.item_from_zerodim(other)
+
+        if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
             return NotImplemented
 
-        opstr = '__{opname}__'.format(opname=op.__name__).replace('__r', '__')
-        # allow division by a timedelta
-        if opstr in ['__div__', '__truediv__', '__floordiv__']:
-            if _is_convertible_to_td(other):
-                other = Timedelta(other)
-                if isna(other):
-                    raise NotImplementedError(
-                        "division by pd.NaT not implemented")
-
-                i8 = self.asi8
-                left, right = i8, other.value
-
-                if opstr in ['__floordiv__']:
-                    result = op(left, right)
-                else:
-                    result = op(left, np.float64(right))
-                result = self._maybe_mask_results(result, fill_value=None,
-                                                  convert='float64')
-                return result
+        if is_scalar(other):
+            # numpy will accept float and int, raise TypeError for others
+            result = self._data * other
+            freq = None
+            if self.freq is not None and not isna(other):
+                freq = self.freq * other
+            return type(self)(result, freq=freq)
+
+        if not hasattr(other, "dtype"):
+            # list, tuple
+            other = np.array(other)
+        if len(other) != len(self) and not is_timedelta64_dtype(other):
+            # Exclude timedelta64 here so we correctly raise TypeError
+            #  for that instead of ValueError
+            raise ValueError("Cannot multiply with unequal lengths")
+
+        if is_object_dtype(other):
+            # this multiplication will succeed only if all elements of other
+            #  are int or float scalars, so we will end up with
+            #  timedelta64[ns]-dtyped result
+            result = [self[n] * other[n] for n in range(len(self))]
+            result = np.array(result)
+            return type(self)(result)
 
-        return NotImplemented
+        # numpy will accept float or int dtype, raise TypeError for others
+        result = self._data * other
+        return type(self)(result)
 
-    __mul__ = _wrap_tdi_op(operator.mul)
     __rmul__ = __mul__
-    __floordiv__ = _wrap_tdi_op(operator.floordiv)
-    __rfloordiv__ = _wrap_tdi_op(ops.rfloordiv)
 
     def __truediv__(self, other):
         # timedelta / X is well-defined for timedelta-like or numeric X
@@ -464,6 +443,165 @@ def __rtruediv__(self, other):
         __div__ = __truediv__
         __rdiv__ = __rtruediv__
 
+    def __floordiv__(self, other):
+        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
+            return NotImplemented
+
+        other = lib.item_from_zerodim(other)
+        if is_scalar(other):
+            if isinstance(other, (timedelta, np.timedelta64, Tick)):
+                other = Timedelta(other)
+                if other is NaT:
+                    # treat this specifically as timedelta-NaT
+                    result = np.empty(self.shape, dtype=np.float64)
+                    result.fill(np.nan)
+                    return result
+
+                # dispatch to Timedelta implementation
+                result = other.__rfloordiv__(self._data)
+                return result
+
+            # at this point we should only have numeric scalars; anything
+            #  else will raise
+            result = self.asi8 // other
+            result[self._isnan] = iNaT
+            freq = None
+            if self.freq is not None:
+                # Note: freq gets division, not floor-division
+                freq = self.freq / other
+            return type(self)(result.view('m8[ns]'), freq=freq)
+
+        if not hasattr(other, "dtype"):
+            # list, tuple
+            other = np.array(other)
+        if len(other) != len(self):
+            raise ValueError("Cannot divide with unequal lengths")
+
+        elif is_timedelta64_dtype(other):
+            other = type(self)(other)
+
+            # numpy timedelta64 does not natively support floordiv, so operate
+            #  on the i8 values
+            result = self.asi8 // other.asi8
+            mask = self._isnan | other._isnan
+            if mask.any():
+                result = result.astype(np.int64)
+                result[mask] = np.nan
+            return result
+
+        elif is_object_dtype(other):
+            result = [self[n] // other[n] for n in range(len(self))]
+            result = np.array(result)
+            if lib.infer_dtype(result) == 'timedelta':
+                result, _ = sequence_to_td64ns(result)
+                return type(self)(result)
+            return result
+
+        elif is_integer_dtype(other) or is_float_dtype(other):
+            result = self._data // other
+            return type(self)(result)
+
+        else:
+            dtype = getattr(other, "dtype", type(other).__name__)
+            raise TypeError("Cannot divide {typ} by {cls}"
+                            .format(typ=dtype, cls=type(self).__name__))
+
+    def __rfloordiv__(self, other):
+        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
+            return NotImplemented
+
+        other = lib.item_from_zerodim(other)
+        if is_scalar(other):
+            if isinstance(other, (timedelta, np.timedelta64, Tick)):
+                other = Timedelta(other)
+                if other is NaT:
+                    # treat this specifically as timedelta-NaT
+                    result = np.empty(self.shape, dtype=np.float64)
+                    result.fill(np.nan)
+                    return result
+
+                # dispatch to Timedelta implementation
+                result = other.__floordiv__(self._data)
+                return result
+
+            raise TypeError("Cannot divide {typ} by {cls}"
+                            .format(typ=type(other).__name__,
+                                    cls=type(self).__name__))
+
+        if not hasattr(other, "dtype"):
+            # list, tuple
+            other = np.array(other)
+        if len(other) != len(self):
+            raise ValueError("Cannot divide with unequal lengths")
+
+        elif is_timedelta64_dtype(other):
+            other = type(self)(other)
+
+            # numpy timedelta64 does not natively support floordiv, so operate
+            #  on the i8 values
+            result = other.asi8 // self.asi8
+            mask = self._isnan | other._isnan
+            if mask.any():
+                result = result.astype(np.int64)
+                result[mask] = np.nan
+            return result
+
+        elif is_object_dtype(other):
+            result = [other[n] // self[n] for n in range(len(self))]
+            result = np.array(result)
+            return result
+
+        else:
+            dtype = getattr(other, "dtype", type(other).__name__)
+            raise TypeError("Cannot divide {typ} by {cls}"
+                            .format(typ=dtype, cls=type(self).__name__))
+
+    def __mod__(self, other):
+        # Note: This is a naive implementation, can likely be optimized
+        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
+            return NotImplemented
+
+        other = lib.item_from_zerodim(other)
+        if isinstance(other, (timedelta, np.timedelta64, Tick)):
+            other = Timedelta(other)
+        return self - (self // other) * other
+
+    def __rmod__(self, other):
+        # Note: This is a naive implementation, can likely be optimized
+        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
+            return NotImplemented
+
+        other = lib.item_from_zerodim(other)
+        if isinstance(other, (timedelta, np.timedelta64, Tick)):
+            other = Timedelta(other)
+        return other - (other // self) * self
+
+    def __divmod__(self, other):
+        # Note: This is a naive implementation, can likely be optimized
+        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
+            return NotImplemented
+
+        other = lib.item_from_zerodim(other)
+        if isinstance(other, (timedelta, np.timedelta64, Tick)):
+            other = Timedelta(other)
+
+        res1 = self // other
+        res2 = self - res1 * other
+        return res1, res2
+
+    def __rdivmod__(self, other):
+        # Note: This is a naive implementation, can likely be optimized
+        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
+            return NotImplemented
+
+        other = lib.item_from_zerodim(other)
+        if isinstance(other, (timedelta, np.timedelta64, Tick)):
+            other = Timedelta(other)
+
+        res1 = other // self
+        res2 = other - res1 * self
+        return res1, res2
+
     # Note: TimedeltaIndex overrides this in call to cls._add_numeric_methods
     def __neg__(self):
         if self.freq is not None:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index e850db4178f415..a5b8e22070923e 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -5031,23 +5031,22 @@ def _add_numeric_methods_binary(cls):
         cls.__radd__ = _make_arithmetic_op(ops.radd, cls)
         cls.__sub__ = _make_arithmetic_op(operator.sub, cls)
         cls.__rsub__ = _make_arithmetic_op(ops.rsub, cls)
-        cls.__mul__ = _make_arithmetic_op(operator.mul, cls)
-        cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls)
         cls.__rpow__ = _make_arithmetic_op(ops.rpow, cls)
         cls.__pow__ = _make_arithmetic_op(operator.pow, cls)
+
+        cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls)
+        cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls)
+        if not compat.PY3:
+            cls.__div__ = _make_arithmetic_op(operator.div, cls)
+            cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls)
+
+        # TODO: rmod? rdivmod?
         cls.__mod__ = _make_arithmetic_op(operator.mod, cls)
         cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls)
         cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls)
-
-        if not issubclass(cls, ABCTimedeltaIndex):
-            # GH#23829 TimedeltaIndex defines these directly
-            cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls)
-            cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls)
-            if not compat.PY3:
-                cls.__div__ = _make_arithmetic_op(operator.div, cls)
-                cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls)
-
         cls.__divmod__ = _make_arithmetic_op(divmod, cls)
+        cls.__mul__ = _make_arithmetic_op(operator.mul, cls)
+        cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls)
 
     @classmethod
     def _add_numeric_methods_unary(cls):
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 0dedd8fe1cf4b3..bb0d54f1954576 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -573,6 +573,12 @@ def wrap_arithmetic_op(self, other, result):
     if result is NotImplemented:
         return NotImplemented
 
+    if isinstance(result, tuple):
+        # divmod, rdivmod
+        assert len(result) == 2
+        return (wrap_arithmetic_op(self, other, result[0]),
+                wrap_arithmetic_op(self, other, result[1]))
+
     if not isinstance(result, Index):
         # Index.__new__ will choose appropriate subclass for dtype
         result = Index(result)
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index e33d61d29d302e..1c84e592d3a0dd 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -31,7 +31,24 @@
 from pandas.tseries.frequencies import to_offset
 
 
-class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin, Int64Index):
+def _make_wrapped_arith_op(opname):
+
+    meth = getattr(TimedeltaArray, opname)
+
+    def method(self, other):
+        oth = other
+        if isinstance(other, Index):
+            oth = other._data
+
+        result = meth(self, oth)
+        return wrap_arithmetic_op(self, other, result)
+
+    method.__name__ = opname
+    return method
+
+
+class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin,
+                     dtl.TimelikeOps, Int64Index):
     """
     Immutable ndarray of timedelta64 data, represented internally as int64, and
     which can be boxed to timedelta objects
@@ -203,10 +220,6 @@ def _maybe_update_attributes(self, attrs):
             attrs['freq'] = 'infer'
         return attrs
 
-    def _evaluate_with_timedelta_like(self, other, op):
-        result = TimedeltaArray._evaluate_with_timedelta_like(self, other, op)
-        return wrap_arithmetic_op(self, other, result)
-
     # -------------------------------------------------------------------
     # Rendering Methods
 
@@ -224,10 +237,14 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
     # -------------------------------------------------------------------
     # Wrapping TimedeltaArray
 
-    __mul__ = Index.__mul__
-    __rmul__ = Index.__rmul__
-    __floordiv__ = Index.__floordiv__
-    __rfloordiv__ = Index.__rfloordiv__
+    __mul__ = _make_wrapped_arith_op("__mul__")
+    __rmul__ = _make_wrapped_arith_op("__rmul__")
+    __floordiv__ = _make_wrapped_arith_op("__floordiv__")
+    __rfloordiv__ = _make_wrapped_arith_op("__rfloordiv__")
+    __mod__ = _make_wrapped_arith_op("__mod__")
+    __rmod__ = _make_wrapped_arith_op("__rmod__")
+    __divmod__ = _make_wrapped_arith_op("__divmod__")
+    __rdivmod__ = _make_wrapped_arith_op("__rdivmod__")
 
     days = wrap_field_accessor(TimedeltaArray.days)
     seconds = wrap_field_accessor(TimedeltaArray.seconds)
@@ -658,7 +675,7 @@ def delete(self, loc):
 
 
 TimedeltaIndex._add_comparison_ops()
-TimedeltaIndex._add_numeric_methods()
+TimedeltaIndex._add_numeric_methods_unary()
 TimedeltaIndex._add_logical_methods_disabled()
 TimedeltaIndex._add_datetimelike_methods()
 
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 850c4dd7c45e7f..6ea31422478f2c 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -1552,8 +1552,7 @@ def wrapper(left, right):
         elif is_timedelta64_dtype(left):
             result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex)
             return construct_result(left, result,
-                                    index=left.index, name=res_name,
-                                    dtype=result.dtype)
+                                    index=left.index, name=res_name)
 
         elif is_timedelta64_dtype(right):
             # We should only get here with non-scalar or timedelta64('NaT')
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index 667c2b4103e001..bc9b712e78d035 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -1413,7 +1413,7 @@ def check(get_ser, test_ser):
             # with 'operate' (from core/ops.py) for the ops that are not
             # defined
             op = getattr(get_ser, op_str, None)
-            with pytest.raises(TypeError, match='operate|cannot'):
+            with pytest.raises(TypeError, match='operate|[cC]annot'):
                 op(test_ser)
 
         # ## timedelta64 ###
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
index 81e7062c23fbe6..5f2fd98e29b96f 100644
--- a/pandas/tests/arithmetic/test_timedelta64.py
+++ b/pandas/tests/arithmetic/test_timedelta64.py
@@ -1280,7 +1280,8 @@ def test_td64arr_floordiv_int(self, box_with_array):
         result = idx // 1
         tm.assert_equal(result, idx)
 
-        pattern = 'floor_divide cannot use operands'
+        pattern = ('floor_divide cannot use operands|'
+                   'Cannot divide int by Timedelta*')
         with pytest.raises(TypeError, match=pattern):
             1 // idx
 
@@ -1317,6 +1318,66 @@ def test_td64arr_rfloordiv_tdlike_scalar(self, scalar_td, box_with_array):
         res = tdi // (scalar_td)
         tm.assert_equal(res, expected)
 
+    # ------------------------------------------------------------------
+    # mod, divmod
+    # TODO: operations with timedelta-like arrays, numeric arrays,
+    #  reversed ops
+
+    def test_td64arr_mod_tdscalar(self, box_with_array, three_days):
+        tdi = timedelta_range('1 Day', '9 days')
+        tdarr = tm.box_expected(tdi, box_with_array)
+
+        expected = TimedeltaIndex(['1 Day', '2 Days', '0 Days'] * 3)
+        expected = tm.box_expected(expected, box_with_array)
+
+        result = tdarr % three_days
+        tm.assert_equal(result, expected)
+
+        if box_with_array is pd.DataFrame:
+            pytest.xfail("DataFrame does not have __divmod__ or __rdivmod__")
+
+        result = divmod(tdarr, three_days)
+        tm.assert_equal(result[1], expected)
+        tm.assert_equal(result[0], tdarr // three_days)
+
+    def test_td64arr_mod_int(self, box_with_array):
+        tdi = timedelta_range('1 ns', '10 ns', periods=10)
+        tdarr = tm.box_expected(tdi, box_with_array)
+
+        expected = TimedeltaIndex(['1 ns', '0 ns'] * 5)
+        expected = tm.box_expected(expected, box_with_array)
+
+        result = tdarr % 2
+        tm.assert_equal(result, expected)
+
+        with pytest.raises(TypeError):
+            2 % tdarr
+
+        if box_with_array is pd.DataFrame:
+            pytest.xfail("DataFrame does not have __divmod__ or __rdivmod__")
+
+        result = divmod(tdarr, 2)
+        tm.assert_equal(result[1], expected)
+        tm.assert_equal(result[0], tdarr // 2)
+
+    def test_td64arr_rmod_tdscalar(self, box_with_array, three_days):
+        tdi = timedelta_range('1 Day', '9 days')
+        tdarr = tm.box_expected(tdi, box_with_array)
+
+        expected = ['0 Days', '1 Day', '0 Days'] + ['3 Days'] * 6
+        expected = TimedeltaIndex(expected)
+        expected = tm.box_expected(expected, box_with_array)
+
+        result = three_days % tdarr
+        tm.assert_equal(result, expected)
+
+        if box_with_array is pd.DataFrame:
+            pytest.xfail("DataFrame does not have __divmod__ or __rdivmod__")
+
+        result = divmod(three_days, tdarr)
+        tm.assert_equal(result[1], expected)
+        tm.assert_equal(result[0], three_days // tdarr)
+
     # ------------------------------------------------------------------
     # Operations with invalid others
 

From 71ab3bc9f062a60b9127854d94171ea843d2de8f Mon Sep 17 00:00:00 2001
From: Tarbo Fukazawa <dcreekp@users.noreply.github.com>
Date: Mon, 3 Dec 2018 13:32:35 +0000
Subject: [PATCH 77/78] TST: clean up to change setup.cfg to `xfail_strict =
 True` (GH23057) (#23721)

---
 pandas/tests/arithmetic/conftest.py           |  4 ++--
 pandas/tests/arithmetic/test_numeric.py       |  2 +-
 pandas/tests/arithmetic/test_object.py        |  4 ++--
 .../arrays/categorical/test_constructors.py   |  3 +--
 pandas/tests/arrays/sparse/test_array.py      |  2 +-
 pandas/tests/extension/base/ops.py            |  3 +--
 pandas/tests/extension/base/setitem.py        |  3 +--
 pandas/tests/extension/json/test_json.py      |  2 +-
 pandas/tests/extension/test_categorical.py    | 21 +++++++++----------
 pandas/tests/frame/test_combine_concat.py     |  3 +--
 pandas/tests/frame/test_duplicates.py         |  3 +--
 pandas/tests/groupby/test_apply.py            |  3 +--
 pandas/tests/groupby/test_categorical.py      |  2 +-
 pandas/tests/indexes/interval/test_astype.py  |  4 ++--
 pandas/tests/indexes/multi/test_missing.py    |  2 +-
 pandas/tests/indexes/test_base.py             |  6 ++----
 pandas/tests/indexing/test_coercion.py        |  7 ++++---
 pandas/tests/io/formats/test_to_csv.py        |  2 +-
 pandas/tests/io/formats/test_to_html.py       |  2 +-
 .../tests/io/json/test_json_table_schema.py   |  6 +++---
 pandas/tests/io/parser/test_usecols.py        |  4 +++-
 pandas/tests/io/test_excel.py                 |  3 +--
 pandas/tests/io/test_parquet.py               |  7 +++----
 pandas/tests/plotting/test_datetimelike.py    |  1 +
 pandas/tests/plotting/test_frame.py           |  4 ++--
 pandas/tests/plotting/test_misc.py            |  1 +
 pandas/tests/plotting/test_series.py          |  1 +
 pandas/tests/reshape/test_pivot.py            | 12 ++++-------
 pandas/tests/scalar/period/test_asfreq.py     |  3 +--
 pandas/tests/scalar/period/test_period.py     |  3 ++-
 pandas/tests/series/test_analytics.py         |  2 +-
 pandas/tests/series/test_period.py            |  6 ++----
 pandas/tests/series/test_rank.py              |  3 +--
 pandas/tests/sparse/frame/test_analytics.py   |  6 ++----
 pandas/tests/sparse/frame/test_frame.py       | 11 ++++------
 pandas/tests/sparse/frame/test_indexing.py    | 12 ++++-------
 pandas/tests/sparse/series/test_indexing.py   |  6 ++----
 pandas/tests/test_downstream.py               |  2 +-
 pandas/tests/test_window.py                   |  3 +--
 .../offsets/test_offsets_properties.py        |  4 ++--
 pandas/util/_test_decorators.py               |  3 ++-
 setup.cfg                                     |  1 +
 42 files changed, 80 insertions(+), 102 deletions(-)

diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py
index 2714b68fa6ff4e..44e6cc664de6dd 100644
--- a/pandas/tests/arithmetic/conftest.py
+++ b/pandas/tests/arithmetic/conftest.py
@@ -154,7 +154,7 @@ def box(request):
 @pytest.fixture(params=[pd.Index,
                         pd.Series,
                         pytest.param(pd.DataFrame,
-                                     marks=pytest.mark.xfail(strict=True))],
+                                     marks=pytest.mark.xfail)],
                 ids=id_func)
 def box_df_fail(request):
     """
@@ -167,7 +167,7 @@ def box_df_fail(request):
                         (pd.Series, False),
                         (pd.DataFrame, False),
                         pytest.param((pd.DataFrame, True),
-                                     marks=pytest.mark.xfail(strict=True))],
+                                     marks=pytest.mark.xfail)],
                 ids=id_func)
 def box_transpose_fail(request):
     """
diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
index c3cd9f0f435591..2d26959c656322 100644
--- a/pandas/tests/arithmetic/test_numeric.py
+++ b/pandas/tests/arithmetic/test_numeric.py
@@ -407,7 +407,7 @@ class TestMultiplicationDivision(object):
         pytest.param(pd.Index,
                      marks=pytest.mark.xfail(reason="Index.__div__ always "
                                                     "raises",
-                                             raises=TypeError, strict=True)),
+                                             raises=TypeError)),
         pd.Series,
         pd.DataFrame
     ], ids=lambda x: x.__name__)
diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py
index 511d74a2e790c9..da5055cc7b7374 100644
--- a/pandas/tests/arithmetic/test_object.py
+++ b/pandas/tests/arithmetic/test_object.py
@@ -92,7 +92,7 @@ def test_add_extension_scalar(self, other, box, op):
     @pytest.mark.parametrize('box', [
         pytest.param(pd.Index,
                      marks=pytest.mark.xfail(reason="Does not mask nulls",
-                                             strict=True, raises=TypeError)),
+                                             raises=TypeError)),
         pd.Series,
         pd.DataFrame
     ], ids=lambda x: x.__name__)
@@ -109,7 +109,7 @@ def test_objarr_add_str(self, box):
     @pytest.mark.parametrize('box', [
         pytest.param(pd.Index,
                      marks=pytest.mark.xfail(reason="Does not mask nulls",
-                                             strict=True, raises=TypeError)),
+                                             raises=TypeError)),
         pd.Series,
         pd.DataFrame
     ], ids=lambda x: x.__name__)
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index a473f44d5d4aa6..f1475c5022a861 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -538,8 +538,7 @@ def test_construction_with_ordered(self):
         cat = Categorical([0, 1, 2], ordered=True)
         assert cat.ordered
 
-    @pytest.mark.xfail(reason="Imaginary values not supported in Categorical",
-                       strict=True)
+    @pytest.mark.xfail(reason="Imaginary values not supported in Categorical")
     def test_constructor_imaginary(self):
         values = [1, 2, 3 + 1j]
         c1 = Categorical(values)
diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
index 619cd05128ddbb..b8cef92f6a6d4d 100644
--- a/pandas/tests/arrays/sparse/test_array.py
+++ b/pandas/tests/arrays/sparse/test_array.py
@@ -488,7 +488,7 @@ def test_astype_all(self, any_real_dtype):
             SparseArray(np.array([0, 1], dtype='datetime64[ns]'),
                         dtype=SparseDtype('datetime64[ns]',
                                           pd.Timestamp('1970'))),
-            marks=[pytest.mark.xfail(reason="NumPy-7619", strict=True)],
+            marks=[pytest.mark.xfail(reason="NumPy-7619")],
         ),
         (SparseArray([0, 1, 10]), str,
          SparseArray(['0', '1', '10'], dtype=SparseDtype(str, '0'))),
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index 2161214190be06..cd5e55d9871b2f 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -72,8 +72,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
         s = pd.Series(data)
         self.check_opname(s, op_name, s.iloc[0], exc=self.series_scalar_exc)
 
-    @pytest.mark.xfail(run=False, reason="_reduce needs implementation",
-                       strict=True)
+    @pytest.mark.xfail(run=False, reason="_reduce needs implementation")
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
         # frame & scalar
         op_name = all_arithmetic_operators
diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
index 479ab716763157..5c767c28643c92 100644
--- a/pandas/tests/extension/base/setitem.py
+++ b/pandas/tests/extension/base/setitem.py
@@ -166,8 +166,7 @@ def test_setitem_frame_invalid_length(self, data):
         with pytest.raises(ValueError, match=xpr):
             df['B'] = data[:5]
 
-    @pytest.mark.xfail(reason="GH#20441: setitem on extension types.",
-                       strict=True)
+    @pytest.mark.xfail(reason="GH#20441: setitem on extension types.")
     def test_setitem_tuple_index(self, data):
         s = pd.Series(data[:2], index=[(0, 0), (0, 1)])
         expected = pd.Series(data.take([1, 1]), index=s.index)
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index a9fb22bb72497a..66e5f6b6dc7328 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -148,7 +148,7 @@ def test_stack(self):
         rows since we consider `{}` NA, but `.astype(object)` doesn't.
         """
 
-    @pytest.mark.xfail(reason="dict for NA", strict=True)
+    @pytest.mark.xfail(reason="dict for NA")
     def test_unstack(self, data, index):
         # The base test has NaN for the expected NA value.
         # this matches otherwise
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index 279bfb5dc8eab4..5b873b337880e0 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -72,7 +72,7 @@ class TestDtype(base.BaseDtypeTests):
 
 
 class TestInterface(base.BaseInterfaceTests):
-    @pytest.mark.skip(reason="Memory usage doesn't match", strict=True)
+    @pytest.mark.skip(reason="Memory usage doesn't match")
     def test_memory_usage(self, data):
         # Is this deliberate?
         super(TestInterface, self).test_memory_usage(data)
@@ -87,9 +87,9 @@ class TestReshaping(base.BaseReshapingTests):
 
 
 class TestGetitem(base.BaseGetitemTests):
-    skip_take = pytest.mark.skip(reason="GH-20664.", strict=True)
+    skip_take = pytest.mark.skip(reason="GH-20664.")
 
-    @pytest.mark.skip(reason="Backwards compatibility", strict=True)
+    @pytest.mark.skip(reason="Backwards compatibility")
     def test_getitem_scalar(self, data):
         # CategoricalDtype.type isn't "correct" since it should
         # be a parent of the elements (object). But don't want
@@ -117,7 +117,7 @@ def test_take_non_na_fill_value(self, data_missing):
     def test_take_out_of_bounds_raises(self, data, allow_fill):
         return super().test_take_out_of_bounds_raises(data, allow_fill)
 
-    @pytest.mark.skip(reason="GH-20747. Unobserved categories.", strict=True)
+    @pytest.mark.skip(reason="GH-20747. Unobserved categories.")
     def test_take_series(self, data):
         super().test_take_series(data)
 
@@ -125,12 +125,11 @@ def test_take_series(self, data):
     def test_reindex_non_na_fill_value(self, data_missing):
         super().test_reindex_non_na_fill_value(data_missing)
 
-    @pytest.mark.skip(reason="Categorical.take buggy", strict=True)
+    @pytest.mark.skip(reason="Categorical.take buggy")
     def test_take_empty(self, data, na_value, na_cmp):
         super().test_take_empty(data, na_value, na_cmp)
 
-    @pytest.mark.skip(reason="test not written correctly for categorical",
-                      strict=True)
+    @pytest.mark.skip(reason="test not written correctly for categorical")
     def test_reindex(self, data, na_value):
         super().test_reindex(data, na_value)
 
@@ -141,11 +140,11 @@ class TestSetitem(base.BaseSetitemTests):
 
 class TestMissing(base.BaseMissingTests):
 
-    @pytest.mark.skip(reason="Not implemented", strict=True)
+    @pytest.mark.skip(reason="Not implemented")
     def test_fillna_limit_pad(self, data_missing):
         super().test_fillna_limit_pad(data_missing)
 
-    @pytest.mark.skip(reason="Not implemented", strict=True)
+    @pytest.mark.skip(reason="Not implemented")
     def test_fillna_limit_backfill(self, data_missing):
         super().test_fillna_limit_backfill(data_missing)
 
@@ -155,7 +154,7 @@ class TestReduce(base.BaseNoReduceTests):
 
 
 class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.skip(reason="Unobserved categories included", strict=True)
+    @pytest.mark.skip(reason="Unobserved categories included")
     def test_value_counts(self, all_data, dropna):
         return super().test_value_counts(all_data, dropna)
 
@@ -175,7 +174,7 @@ def test_combine_add(self, data_repeated):
         expected = pd.Series([a + val for a in list(orig_data1)])
         self.assert_series_equal(result, expected)
 
-    @pytest.mark.skip(reason="Not Applicable", strict=True)
+    @pytest.mark.skip(reason="Not Applicable")
     def test_fillna_length_mismatch(self, data_missing):
         super().test_fillna_length_mismatch(data_missing)
 
diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py
index 25c5222b5f03c3..c60bb57625d75f 100644
--- a/pandas/tests/frame/test_combine_concat.py
+++ b/pandas/tests/frame/test_combine_concat.py
@@ -77,8 +77,7 @@ def test_concat_multiple_tzs(self):
         [
             '2015-01-01',
             pytest.param(pd.NaT, marks=pytest.mark.xfail(
-                reason='GH23037 incorrect dtype when concatenating',
-                strict=True))])
+                reason='GH23037 incorrect dtype when concatenating'))])
     def test_concat_tz_NaT(self, t1):
         # GH 22796
         # Concating tz-aware multicolumn DataFrames
diff --git a/pandas/tests/frame/test_duplicates.py b/pandas/tests/frame/test_duplicates.py
index 3478d66b919a6a..c9aff97bfa4b1f 100644
--- a/pandas/tests/frame/test_duplicates.py
+++ b/pandas/tests/frame/test_duplicates.py
@@ -55,8 +55,7 @@ def test_duplicated_keep(keep, expected):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(reason="GH#21720; nan/None falsely considered equal",
-                   strict=True)
+@pytest.mark.xfail(reason="GH#21720; nan/None falsely considered equal")
 @pytest.mark.parametrize('keep, expected', [
     ('first', Series([False, False, True, False, True])),
     ('last', Series([True, True, False, False, False])),
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 3bc5e51ca046ab..8366f75a5795ee 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -60,8 +60,7 @@ def test_apply_trivial():
 
 @pytest.mark.xfail(reason="GH#20066; function passed into apply "
                           "returns a DataFrame with the same index "
-                          "as the one to create GroupBy object.",
-                   strict=True)
+                          "as the one to create GroupBy object.")
 def test_apply_trivial_fail():
     # GH 20066
     # trivial apply fails if the constant dataframe has the same index
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 14a09b83e5b7ce..3692d34afcc039 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -206,7 +206,7 @@ def test_level_get_group(observed):
     assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(PY37, reason="flaky on 3.7, xref gh-21636")
+@pytest.mark.xfail(PY37, reason="flaky on 3.7, xref gh-21636", strict=False)
 @pytest.mark.parametrize('ordered', [True, False])
 def test_apply(ordered):
     # GH 10138
diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py
index 8bcd6ef5dcc5ac..8406a8c458cca4 100644
--- a/pandas/tests/indexes/interval/test_astype.py
+++ b/pandas/tests/indexes/interval/test_astype.py
@@ -92,7 +92,7 @@ def test_subtype_integer(self, subtype_start, subtype_end):
                                              closed=index.closed)
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.xfail(reason='GH#15832', strict=True)
+    @pytest.mark.xfail(reason='GH#15832')
     def test_subtype_integer_errors(self):
         # int64 -> uint64 fails with negative values
         index = interval_range(-10, 10)
@@ -130,7 +130,7 @@ def test_subtype_integer(self, subtype):
         with pytest.raises(ValueError, match=msg):
             index.insert(0, np.nan).astype(dtype)
 
-    @pytest.mark.xfail(reason='GH#15832', strict=True)
+    @pytest.mark.xfail(reason='GH#15832')
     def test_subtype_integer_errors(self):
         # float64 -> uint64 fails with negative values
         index = interval_range(-10.0, 10.0)
diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py
index 73e6579cf77712..8ce33f100a6afe 100644
--- a/pandas/tests/indexes/multi/test_missing.py
+++ b/pandas/tests/indexes/multi/test_missing.py
@@ -84,7 +84,7 @@ def test_nulls(idx):
         idx.isna()
 
 
-@pytest.mark.xfail(strict=True)
+@pytest.mark.xfail
 def test_hasnans_isnans(idx):
     # GH 11343, added tests for hasnans / isnans
     index = idx.copy()
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 7028b5c23225c3..fe7391ff15ebe9 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -462,8 +462,7 @@ def test_constructor_overflow_int64(self):
             Index([np.iinfo(np.uint64).max - 1], dtype="int64")
 
     @pytest.mark.xfail(reason="see GH#21311: Index "
-                              "doesn't enforce dtype argument",
-                       strict=True)
+                              "doesn't enforce dtype argument")
     def test_constructor_cast(self):
         msg = "could not convert string to float"
         with pytest.raises(ValueError, match=msg):
@@ -1471,8 +1470,7 @@ def test_slice_float_locs(self):
         assert index2.slice_locs(8.5, 1.5) == (2, 6)
         assert index2.slice_locs(10.5, -1) == (0, n)
 
-    @pytest.mark.xfail(reason="Assertions were not correct - see GH#20915",
-                       strict=True)
+    @pytest.mark.xfail(reason="Assertions were not correct - see GH#20915")
     def test_slice_ints_with_floats_raises(self):
         # int slicing with floats
         # GH 4892, these are all TypeErrors
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index 2bc3aefcf7eb1a..29b60d80750b21 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -903,11 +903,12 @@ def test_replace_series_datetime_tz(self):
 
     # TODO(jreback) commented out to only have a single xfail printed
     @pytest.mark.xfail(reason="different tz, "
-                       "currently mask_missing raises SystemError")
+                       "currently mask_missing raises SystemError",
+                       strict=False)
     # @pytest.mark.parametrize('how', ['dict', 'series'])
     # @pytest.mark.parametrize('to_key', [
-    #     'datetime64[ns]', 'datetime64[ns, UTC]',
-    #     'datetime64[ns, US/Eastern]'])
+    #    'datetime64[ns]', 'datetime64[ns, UTC]',
+    #    'datetime64[ns, US/Eastern]'])
     # @pytest.mark.parametrize('from_key', [
     #    'datetime64[ns]', 'datetime64[ns, UTC]',
     #    'datetime64[ns, US/Eastern]'])
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 3792da4b29ef9c..cba3f000b59c15 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -358,7 +358,7 @@ def test_to_csv_string_array_ascii(self):
             with open(path, 'r') as f:
                 assert f.read() == expected_ascii
 
-    @pytest.mark.xfail(strict=True)
+    @pytest.mark.xfail
     def test_to_csv_string_array_utf8(self):
         # GH 10813
         str_array = [{'names': ['foo', 'bar']}, {'names': ['baz', 'qux']}]
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
index e7f87b236ef7fc..ce9aca3a87c51e 100644
--- a/pandas/tests/io/formats/test_to_html.py
+++ b/pandas/tests/io/formats/test_to_html.py
@@ -220,7 +220,7 @@ def test_to_html_truncate_multi_index(self, datapath):
         expected = expected_html(datapath, 'truncate_multi_index')
         assert result == expected
 
-    @pytest.mark.xfail(reason='GH22887 TypeError', strict=True)
+    @pytest.mark.xfail(reason='GH22887 TypeError')
     def test_to_html_truncate_multi_index_sparse_off(self, datapath):
         arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
                   ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
index 0b4ff2c34297a4..bb154de14611cc 100644
--- a/pandas/tests/io/json/test_json_table_schema.py
+++ b/pandas/tests/io/json/test_json_table_schema.py
@@ -498,7 +498,7 @@ class TestTableOrientReader(object):
         None,
         "idx",
         pytest.param("index",
-                     marks=pytest.mark.xfail(strict=True)),
+                     marks=pytest.mark.xfail),
         'level_0'])
     @pytest.mark.parametrize("vals", [
         {'ints': [1, 2, 3, 4]},
@@ -508,7 +508,7 @@ class TestTableOrientReader(object):
         {'ordered_cats': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
                                                   ordered=True))},
         pytest.param({'floats': [1., 2., 3., 4.]},
-                     marks=pytest.mark.xfail(strict=True)),
+                     marks=pytest.mark.xfail),
         {'floats': [1.1, 2.2, 3.3, 4.4]},
         {'bools': [True, False, False, True]}])
     def test_read_json_table_orient(self, index_nm, vals, recwarn):
@@ -566,7 +566,7 @@ def test_multiindex(self, index_names):
         tm.assert_frame_equal(df, result)
 
     @pytest.mark.parametrize("strict_check", [
-        pytest.param(True, marks=pytest.mark.xfail(strict=True)),
+        pytest.param(True, marks=pytest.mark.xfail),
         False
     ])
     def test_empty_frame_roundtrip(self, strict_check):
diff --git a/pandas/tests/io/parser/test_usecols.py b/pandas/tests/io/parser/test_usecols.py
index d2ec1cf49445f4..068227908a285d 100644
--- a/pandas/tests/io/parser/test_usecols.py
+++ b/pandas/tests/io/parser/test_usecols.py
@@ -520,7 +520,9 @@ def test_raises_on_usecols_names_mismatch(all_parsers, usecols,
         tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(reason="see gh-16469: buggy behavior")
+@pytest.mark.xfail(
+    reason="see gh-16469: works on the C engine but not the Python engine",
+    strict=False)
 @pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]])
 def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols):
     data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 7cc1f1899db980..564364ea01432f 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -2389,8 +2389,7 @@ def check_called(func):
     pytest.param('xlwt',
                  marks=pytest.mark.xfail(reason='xlwt does not support '
                                                 'openpyxl-compatible '
-                                                'style dicts',
-                                         strict=True)),
+                                                'style dicts')),
     'xlsxwriter',
     'openpyxl',
 ])
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 6024fccb15c76a..5964c44a31f48e 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -201,8 +201,7 @@ def test_options_get_engine(fp, pa):
 
 
 @pytest.mark.xfail(is_platform_windows() or is_platform_mac(),
-                   reason="reading pa metadata failing on Windows/mac",
-                   strict=True)
+                   reason="reading pa metadata failing on Windows/mac")
 def test_cross_engine_pa_fp(df_cross_compat, pa, fp):
     # cross-compat with differing reading/writing engines
 
@@ -404,7 +403,8 @@ def test_basic(self, pa, df_full):
         check_round_trip(df, pa)
 
     # TODO: This doesn't fail on all systems; track down which
-    @pytest.mark.xfail(reason="pyarrow fails on this (ARROW-1883)")
+    @pytest.mark.xfail(reason="pyarrow fails on this (ARROW-1883)",
+                       strict=False)
     def test_basic_subset_columns(self, pa, df_full):
         # GH18628
 
@@ -422,7 +422,6 @@ def test_duplicate_columns(self, pa):
                           columns=list('aaa')).copy()
         self.check_error_on_write(df, pa, ValueError)
 
-    @pytest.mark.xfail(reason="failing for pyarrow < 0.11.0")
     def test_unsupported(self, pa):
         # period
         df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)})
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index b26fcf71306ddd..1d6c8dc404d2b2 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -1075,6 +1075,7 @@ def test_irreg_dtypes(self):
         _, ax = self.plt.subplots()
         _check_plot_works(df.plot, ax=ax)
 
+    @pytest.mark.xfail(reason="fails with py2.7.15", strict=False)
     @pytest.mark.slow
     def test_time(self):
         t = datetime(1, 1, 1, 3, 30, 0)
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index f5708b24d22b13..4e047cd44c1e28 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -488,8 +488,7 @@ def test_subplots_timeseries_y_axis(self):
             testdata.plot(y="text")
 
     @pytest.mark.xfail(reason='not support for period, categorical, '
-                              'datetime_mixed_tz',
-                       strict=True)
+                              'datetime_mixed_tz')
     def test_subplots_timeseries_y_axis_not_supported(self):
         """
         This test will fail for:
@@ -2557,6 +2556,7 @@ def test_errorbar_asymmetrical(self):
 
         tm.close()
 
+    # This XPASSES when tested with mpl == 3.0.1
     @td.xfail_if_mpl_2_2
     def test_table(self):
         df = DataFrame(np.random.rand(10, 3),
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
index 1f0a0d6bfee954..9ae3e7fc423f40 100644
--- a/pandas/tests/plotting/test_misc.py
+++ b/pandas/tests/plotting/test_misc.py
@@ -61,6 +61,7 @@ def test_bootstrap_plot(self):
 @td.skip_if_no_mpl
 class TestDataFramePlots(TestPlotBase):
 
+    # This XPASSES when tested with mpl == 3.0.1
     @td.xfail_if_mpl_2_2
     @td.skip_if_no_scipy
     def test_scatter_matrix_axis(self):
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
index e6519c7db7a7bf..cc8aa2018b1a07 100644
--- a/pandas/tests/plotting/test_series.py
+++ b/pandas/tests/plotting/test_series.py
@@ -771,6 +771,7 @@ def test_errorbar_plot(self):
         with pytest.raises((TypeError, ValueError)):
             s.plot(yerr=s_err)
 
+    # This XPASSES when tested with mpl == 3.0.1
     @td.xfail_if_mpl_2_2
     def test_table(self):
         _check_plot_works(self.series.plot, table=True)
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 69572f75fea1b1..b3dd94b49e3a38 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -489,8 +489,7 @@ def test_pivot_with_list_like_values_nans(self, values, method):
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.xfail(reason='MultiIndexed unstack with tuple names fails'
-                              'with KeyError GH#19966',
-                       strict=True)
+                              'with KeyError GH#19966')
     @pytest.mark.parametrize('method', [True, False])
     def test_pivot_with_multiindex(self, method):
         # issue #17160
@@ -616,8 +615,7 @@ def test_margins_dtype(self):
         tm.assert_frame_equal(expected, result)
 
     @pytest.mark.xfail(reason='GH#17035 (len of floats is casted back to '
-                              'floats)',
-                       strict=True)
+                              'floats)')
     def test_margins_dtype_len(self):
         mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')]
         mi = MultiIndex.from_tuples(mi_val, names=('A', 'B'))
@@ -1102,8 +1100,7 @@ def test_pivot_table_margins_name_with_aggfunc_list(self):
         tm.assert_frame_equal(table, expected)
 
     @pytest.mark.xfail(reason='GH#17035 (np.mean of ints is casted back to '
-                              'ints)',
-                       strict=True)
+                              'ints)')
     def test_categorical_margins(self, observed):
         # GH 10989
         df = pd.DataFrame({'x': np.arange(8),
@@ -1118,8 +1115,7 @@ def test_categorical_margins(self, observed):
         tm.assert_frame_equal(table, expected)
 
     @pytest.mark.xfail(reason='GH#17035 (np.mean of ints is casted back to '
-                              'ints)',
-                       strict=True)
+                              'ints)')
     def test_categorical_margins_category(self, observed):
         df = pd.DataFrame({'x': np.arange(8),
                            'y': np.arange(8) // 4,
diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py
index 064d1a96878c22..24f2ed88936b78 100644
--- a/pandas/tests/scalar/period/test_asfreq.py
+++ b/pandas/tests/scalar/period/test_asfreq.py
@@ -34,8 +34,7 @@ def test_asfreq_near_zero_weekly(self):
         assert week2.asfreq('D', 'S') <= per2
 
     @pytest.mark.xfail(reason='GH#19643 period_helper asfreq functions fail '
-                              'to check for overflows',
-                       strict=True)
+                              'to check for overflows')
     def test_to_timestamp_out_of_bounds(self):
         # GH#19643, currently gives Timestamp('1754-08-30 22:43:41.128654848')
         per = Period('0001-01-01', freq='B')
diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py
index 4d3aa1109c1208..ddb4f89738f98f 100644
--- a/pandas/tests/scalar/period/test_period.py
+++ b/pandas/tests/scalar/period/test_period.py
@@ -1468,7 +1468,8 @@ def test_period_immutable():
 
 
 # TODO: This doesn't fail on all systems; track down which
-@pytest.mark.xfail(reason="Parses as Jan 1, 0007 on some systems")
+@pytest.mark.xfail(reason="Parses as Jan 1, 0007 on some systems",
+                   strict=False)
 def test_small_year_parsing():
     per1 = Period('0001-01-07', 'D')
     assert per1.year == 1
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 89d76abaf5f827..6e40324c67b596 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -2098,7 +2098,7 @@ def test_value_counts_with_nan(self):
         "dtype",
         ["int_", "uint", "float_", "unicode_", "timedelta64[h]",
          pytest.param("datetime64[D]",
-                      marks=pytest.mark.xfail(reason="GH#7996", strict=True))]
+                      marks=pytest.mark.xfail(reason="GH#7996"))]
     )
     @pytest.mark.parametrize("is_ordered", [True, False])
     def test_drop_duplicates_categorical_non_bool(self, dtype, is_ordered):
diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py
index ce620db8d9c1b1..0a86bb0b677974 100644
--- a/pandas/tests/series/test_period.py
+++ b/pandas/tests/series/test_period.py
@@ -64,8 +64,7 @@ def test_between(self):
     # ---------------------------------------------------------------------
     # NaT support
 
-    @pytest.mark.xfail(reason="PeriodDtype Series not supported yet",
-                       strict=True)
+    @pytest.mark.xfail(reason="PeriodDtype Series not supported yet")
     def test_NaT_scalar(self):
         series = Series([0, 1000, 2000, pd._libs.iNaT], dtype='period[D]')
 
@@ -75,8 +74,7 @@ def test_NaT_scalar(self):
         series[2] = val
         assert pd.isna(series[2])
 
-    @pytest.mark.xfail(reason="PeriodDtype Series not supported yet",
-                       strict=True)
+    @pytest.mark.xfail(reason="PeriodDtype Series not supported yet")
     def test_NaT_cast(self):
         result = Series([np.nan]).astype('period[D]')
         expected = Series([pd.NaT])
diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py
index 72d05cb4839ef1..da414a577ae0bb 100644
--- a/pandas/tests/series/test_rank.py
+++ b/pandas/tests/series/test_rank.py
@@ -222,8 +222,7 @@ def test_rank_signature(self):
                      'int64',
                      marks=pytest.mark.xfail(
                          reason="iNaT is equivalent to minimum value of dtype"
-                                "int64 pending issue GH#16674",
-                         strict=True)),
+                                "int64 pending issue GH#16674")),
         ([NegInfinity(), '1', 'A', 'BA', 'Ba', 'C', Infinity()],
          'object')
     ])
diff --git a/pandas/tests/sparse/frame/test_analytics.py b/pandas/tests/sparse/frame/test_analytics.py
index 54e3ddbf2f1cfb..2d9ccaa059a8c2 100644
--- a/pandas/tests/sparse/frame/test_analytics.py
+++ b/pandas/tests/sparse/frame/test_analytics.py
@@ -4,8 +4,7 @@
 from pandas.util import testing as tm
 
 
-@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)',
-                   strict=True)
+@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)')
 def test_quantile():
     # GH 17386
     data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
@@ -22,8 +21,7 @@ def test_quantile():
     tm.assert_sp_series_equal(result, sparse_expected)
 
 
-@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)',
-                   strict=True)
+@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)')
 def test_quantile_multi():
     # GH 17386
     data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
index f799eab2f64060..21100e3c3ffeb7 100644
--- a/pandas/tests/sparse/frame/test_frame.py
+++ b/pandas/tests/sparse/frame/test_frame.py
@@ -1140,7 +1140,7 @@ def test_combine_first(self, float_frame):
 
         tm.assert_sp_frame_equal(result, expected)
 
-    @pytest.mark.xfail(reason="No longer supported.", strict=True)
+    @pytest.mark.xfail(reason="No longer supported.")
     def test_combine_first_with_dense(self):
         # We could support this if we allow
         # pd.core.dtypes.cast.find_common_type to special case SparseDtype
@@ -1196,8 +1196,7 @@ def test_as_blocks(self):
         tm.assert_frame_equal(df_blocks['Sparse[float64, nan]'], df)
 
     @pytest.mark.xfail(reason='nan column names in _init_dict problematic '
-                              '(GH#16894)',
-                       strict=True)
+                              '(GH#16894)')
     def test_nan_columnname(self):
         # GH 8822
         nan_colname = DataFrame(Series(1.0, index=[0]), columns=[nan])
@@ -1314,8 +1313,7 @@ def test_numpy_func_call(self, float_frame):
         for func in funcs:
             getattr(np, func)(float_frame)
 
-    @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)',
-                       strict=True)
+    @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)')
     def test_quantile(self):
         # GH 17386
         data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
@@ -1331,8 +1329,7 @@ def test_quantile(self):
         tm.assert_series_equal(result, dense_expected)
         tm.assert_sp_series_equal(result, sparse_expected)
 
-    @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)',
-                       strict=True)
+    @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)')
     def test_quantile_multi(self):
         # GH 17386
         data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
diff --git a/pandas/tests/sparse/frame/test_indexing.py b/pandas/tests/sparse/frame/test_indexing.py
index 607eb2da6ded09..e4ca3b90ff8d0a 100644
--- a/pandas/tests/sparse/frame/test_indexing.py
+++ b/pandas/tests/sparse/frame/test_indexing.py
@@ -18,8 +18,7 @@
         [np.nan, np.nan]
     ]
 ])
-@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)',
-                   strict=True)
+@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)')
 def test_where_with_numeric_data(data):
     # GH 17386
     lower_bound = 1.5
@@ -52,8 +51,7 @@ def test_where_with_numeric_data(data):
     0.1,
     100.0 + 100.0j
 ])
-@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)',
-                   strict=True)
+@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)')
 def test_where_with_numeric_data_and_other(data, other):
     # GH 17386
     lower_bound = 1.5
@@ -70,8 +68,7 @@ def test_where_with_numeric_data_and_other(data, other):
     tm.assert_sp_frame_equal(result, sparse_expected)
 
 
-@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)',
-                   strict=True)
+@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)')
 def test_where_with_bool_data():
     # GH 17386
     data = [[False, False], [True, True], [False, False]]
@@ -94,8 +91,7 @@ def test_where_with_bool_data():
     0.1,
     100.0 + 100.0j
 ])
-@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)',
-                   strict=True)
+@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)')
 def test_where_with_bool_data_and_other(other):
     # GH 17386
     data = [[False, False], [True, True], [False, False]]
diff --git a/pandas/tests/sparse/series/test_indexing.py b/pandas/tests/sparse/series/test_indexing.py
index 998285d9334921..989cf3b974560d 100644
--- a/pandas/tests/sparse/series/test_indexing.py
+++ b/pandas/tests/sparse/series/test_indexing.py
@@ -18,8 +18,7 @@
         np.nan, np.nan
     ]
 ])
-@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)',
-                   strict=True)
+@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)')
 def test_where_with_numeric_data(data):
     # GH 17386
     lower_bound = 1.5
@@ -70,8 +69,7 @@ def test_where_with_numeric_data_and_other(data, other):
     tm.assert_sp_series_equal(result, sparse_expected)
 
 
-@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)',
-                   strict=True)
+@pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)')
 def test_where_with_bool_data():
     # GH 17386
     data = [False, False, True, True, False, False]
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
index abcfa4b320b22c..1d17b514a5b67e 100644
--- a/pandas/tests/test_downstream.py
+++ b/pandas/tests/test_downstream.py
@@ -101,7 +101,7 @@ def test_pandas_gbq(df):
     pandas_gbq = import_module('pandas_gbq')  # noqa
 
 
-@pytest.mark.xfail(reason="0.7.0 pending", strict=True)
+@pytest.mark.xfail(reason="0.7.0 pending")
 @tm.network
 def test_pandas_datareader():
 
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
index 31ea5c11f5bd14..b53aca2c9852b6 100644
--- a/pandas/tests/test_window.py
+++ b/pandas/tests/test_window.py
@@ -695,8 +695,7 @@ def test_numpy_compat(self, method):
         'expander',
         [1, pytest.param('ls', marks=pytest.mark.xfail(
                          reason='GH#16425 expanding with '
-                                'offset not supported',
-                         strict=True))])
+                                'offset not supported'))])
     def test_empty_df_expanding(self, expander):
         # GH 15819 Verifies that datetime and integer expanding windows can be
         # applied to empty DataFrames
diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py
index a1b68d34f4e183..cd5f2a2a25e587 100644
--- a/pandas/tests/tseries/offsets/test_offsets_properties.py
+++ b/pandas/tests/tseries/offsets/test_offsets_properties.py
@@ -72,7 +72,7 @@ def test_on_offset_implementations(dt, offset):
     assert offset.onOffset(dt) == (compare == dt)
 
 
-@pytest.mark.xfail(strict=True)
+@pytest.mark.xfail
 @given(gen_yqm_offset, gen_date_range)
 def test_apply_index_implementations(offset, rng):
     # offset.apply_index(dti)[i] should match dti[i] + offset
@@ -94,7 +94,7 @@ def test_apply_index_implementations(offset, rng):
     # TODO: Check randomly assorted entries, not just first/last
 
 
-@pytest.mark.xfail(strict=True)
+@pytest.mark.xfail
 @given(gen_yqm_offset)
 def test_shift_across_dst(offset):
     # GH#18319 check that 1) timezone is correctly normalized and
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 3f8332ade44875..0331661c3131f1 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -158,7 +158,8 @@ def decorated_func(func):
 skip_if_mpl = pytest.mark.skipif(not _skip_if_no_mpl(),
                                  reason="matplotlib is present")
 xfail_if_mpl_2_2 = pytest.mark.xfail(_skip_if_mpl_2_2(),
-                                     reason="matplotlib 2.2")
+                                     reason="matplotlib 2.2",
+                                     strict=False)
 skip_if_32bit = pytest.mark.skipif(is_platform_32bit(),
                                    reason="skipping for 32 bit")
 skip_if_windows = pytest.mark.skipif(is_platform_windows(),
diff --git a/setup.cfg b/setup.cfg
index 1d26bcab28ef09..25f713822f127e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -99,6 +99,7 @@ markers =
     clipboard: mark a pd.read_clipboard test
 doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL
 addopts = --strict-data-files
+xfail_strict = True
 
 [coverage:run]
 branch = False

From f06b9694ef1bc5715f1736b28002cad5e21e7c5a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 3 Dec 2018 07:36:13 -0600
Subject: [PATCH 78/78] REF: Refactor Datetimelike delegation (#24039)

---
 pandas/core/indexes/datetimelike.py | 47 ++++++++++++++++++++++++
 pandas/core/indexes/period.py       | 56 ++++++++---------------------
 2 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index bb0d54f1954576..8bb9d0b9611ede 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -2,6 +2,7 @@
 """
 Base and utility classes for tseries type pandas objects.
 """
+import operator
 import warnings
 
 import numpy as np
@@ -19,6 +20,7 @@
 from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
 
 from pandas.core import algorithms, ops
+from pandas.core.accessor import PandasDelegate
 from pandas.core.arrays.datetimelike import (
     DatetimeLikeArrayMixin, _ensure_datetimelike_to_i8)
 import pandas.core.indexes.base as ibase
@@ -643,3 +645,48 @@ def f(self):
     f.__name__ = fget.__name__
     f.__doc__ = fget.__doc__
     return property(f)
+
+
+class DatetimelikeDelegateMixin(PandasDelegate):
+    """
+    Delegation mechanism, specific for Datetime, Timedelta, and Period types.
+
+    Functionality is delegated from the Index class to an Array class. A
+    few things can be customized
+
+    * _delegate_class : type
+        The class being delegated to.
+    * _delegated_methods, delegated_properties : List
+        The list of property / method names being delagated.
+    * raw_methods : Set
+        The set of methods whose results should should *not* be
+        boxed in an index, after being returned from the array
+    * raw_properties : Set
+        The set of properties whose results should should *not* be
+        boxed in an index, after being returned from the array
+    """
+    # raw_methods : dispatch methods that shouldn't be boxed in an Index
+    _raw_methods = set()
+    # raw_properties : dispatch properties that shouldn't be boxed in an Index
+    _raw_properties = set()
+    name = None
+    _data = None
+
+    @property
+    def _delegate_class(self):
+        raise AbstractMethodError
+
+    def _delegate_property_get(self, name, *args, **kwargs):
+        result = getattr(self._data, name)
+        if name not in self._raw_properties:
+            result = Index(result, name=self.name)
+        return result
+
+    def _delegate_property_set(self, name, value, *args, **kwargs):
+        setattr(self._data, name, value)
+
+    def _delegate_method(self, name, *args, **kwargs):
+        result = operator.methodcaller(name, *args, **kwargs)(self._data)
+        if name not in self._raw_methods:
+            result = Index(result, name=self.name)
+        return result
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 3cdefb02ef8b3a..71f55f9021eac8 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -1,6 +1,5 @@
 # pylint: disable=E1101,E1103,W0232
 from datetime import datetime, timedelta
-import operator
 import warnings
 
 import numpy as np
@@ -18,15 +17,16 @@
 
 from pandas import compat
 from pandas.core import common as com
-from pandas.core.accessor import PandasDelegate, delegate_names
+from pandas.core.accessor import delegate_names
 from pandas.core.algorithms import unique1d
 import pandas.core.arrays.datetimelike as dtl
+from pandas.core.arrays.datetimelike import DatelikeOps
 from pandas.core.arrays.period import PeriodArray, period_array
 from pandas.core.base import _shared_docs
 import pandas.core.indexes.base as ibase
 from pandas.core.indexes.base import _index_shared_docs, ensure_index
 from pandas.core.indexes.datetimelike import (
-    DatetimeIndexOpsMixin, wrap_arithmetic_op)
+    DatetimeIndexOpsMixin, DatetimelikeDelegateMixin, wrap_arithmetic_op)
 from pandas.core.indexes.datetimes import DatetimeIndex, Index, Int64Index
 from pandas.core.missing import isna
 from pandas.core.ops import get_op_result_name
@@ -54,37 +54,26 @@ def _new_PeriodIndex(cls, **d):
         return cls(values, **d)
 
 
-class PeriodDelegateMixin(PandasDelegate):
+class PeriodDelegateMixin(DatetimelikeDelegateMixin):
     """
     Delegate from PeriodIndex to PeriodArray.
     """
-    def _delegate_property_get(self, name, *args, **kwargs):
-        result = getattr(self._data, name)
-        box_ops = (
-            set(PeriodArray._datetimelike_ops) - set(PeriodArray._bool_ops)
-        )
-        if name in box_ops:
-            result = Index(result, name=self.name)
-        return result
-
-    def _delegate_property_set(self, name, value, *args, **kwargs):
-        setattr(self._data, name, value)
-
-    def _delegate_method(self, name, *args, **kwargs):
-        result = operator.methodcaller(name, *args, **kwargs)(self._data)
-        return Index(result, name=self.name)
+    _delegate_class = PeriodArray
+    _delegated_properties = PeriodArray._datetimelike_ops
+    _delegated_methods = (
+        set(PeriodArray._datetimelike_methods) | {'_addsub_int_array'}
+    )
+    _raw_properties = {'is_leap_year'}
 
 
 @delegate_names(PeriodArray,
-                PeriodArray._datetimelike_ops + ['size', 'asi8', 'shape'],
+                PeriodDelegateMixin._delegated_properties,
                 typ='property')
 @delegate_names(PeriodArray,
-                [x for x in PeriodArray._datetimelike_methods
-                 if x not in {"asfreq", "to_timestamp"}],
-                typ="method",
-                overwrite=True)
-class PeriodIndex(DatetimeIndexOpsMixin,
-                  Int64Index, PeriodDelegateMixin):
+                PeriodDelegateMixin._delegated_methods,
+                typ="method")
+class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index,
+                  PeriodDelegateMixin):
     """
     Immutable ndarray holding ordinal values indicating regular periods in
     time such as particular years, quarters, months, etc.
@@ -349,21 +338,6 @@ def _maybe_box_as_values(self, values, **attribs):
         freq = attribs['freq']
         return PeriodArray(values, freq=freq)
 
-    # ------------------------------------------------------------------------
-    # Dispatch and maybe box. Not done in delegate_names because we box
-    # different from those (which use Index).
-
-    def asfreq(self, freq=None, how='E'):
-        result = self._data.asfreq(freq=freq, how=how)
-        return self._simple_new(result, name=self.name)
-
-    def to_timestamp(self, freq=None, how='start'):
-        from pandas import DatetimeIndex
-        result = self._data.to_timestamp(freq=freq, how=how)
-        return DatetimeIndex._simple_new(result.asi8,
-                                         name=self.name,
-                                         freq=result.freq)
-
     def _maybe_convert_timedelta(self, other):
         """
         Convert timedelta-like input to an integer multiple of self.freq