Merge remote-tracking branch 'upstream/master' into STY-repr-batch-5

pandas-dev · Dec 4, 2019 · aa5ee8d · aa5ee8d
2 parents 895906b + 6b189d7
commit aa5ee8d
Show file tree

Hide file tree

Showing 17 changed files with 141 additions and 264 deletions.
diff --git a/ci/deps/travis-36-cov.yaml b/ci/deps/travis-36-cov.yaml
@@ -27,7 +27,8 @@ dependencies:
   - numexpr
   - numpy=1.15.*
   - odfpy
-  - openpyxl
+  - openpyxl<=3.0.1
+  # https://github.com/pandas-dev/pandas/pull/30009 openpyxl 3.0.2 broke
   - pandas-gbq
   # https://github.com/pydata/pandas-gbq/issues/271
   - google-cloud-bigquery<=1.11

diff --git a/doc/redirects.csv b/doc/redirects.csv
@@ -636,7 +636,6 @@ generated/pandas.Index.equals,../reference/api/pandas.Index.equals
 generated/pandas.Index.factorize,../reference/api/pandas.Index.factorize
 generated/pandas.Index.fillna,../reference/api/pandas.Index.fillna
 generated/pandas.Index.format,../reference/api/pandas.Index.format
-generated/pandas.Index.get_duplicates,../reference/api/pandas.Index.get_duplicates
 generated/pandas.Index.get_indexer_for,../reference/api/pandas.Index.get_indexer_for
 generated/pandas.Index.get_indexer,../reference/api/pandas.Index.get_indexer
 generated/pandas.Index.get_indexer_non_unique,../reference/api/pandas.Index.get_indexer_non_unique

diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst
@@ -152,7 +152,6 @@ Selecting
    Index.asof
    Index.asof_locs
    Index.contains
-   Index.get_duplicates
    Index.get_indexer
    Index.get_indexer_for
    Index.get_indexer_non_unique

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -506,6 +506,9 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
 - Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`)
 - Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`)
 - Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`)
+- Removed the previously deprecated ``time_rule`` keyword from (non-public) :func:`offsets.generate_range`, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`)
+- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`)
+- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`)
 - Removed the previously deprecated "by" keyword from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`)
 - Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`)
 - Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`)
@@ -548,7 +551,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
 - Removed the previously properties :attr:`DataFrame.is_copy`, :attr:`Series.is_copy` (:issue:`18812`)
 - Removed the previously deprecated :meth:`DataFrame.get_ftype_counts`, :meth:`Series.get_ftype_counts` (:issue:`18243`)
 - Removed the previously deprecated :meth:`DataFrame.ftypes`, :meth:`Series.ftypes`, :meth:`Series.ftype` (:issue:`26744`)
-- Removed the previously deprecated :meth:`Index.get_duplicated`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
+- Removed the previously deprecated :meth:`Index.get_duplicates`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
 - Removed the previously deprecated :meth:`Series.clip_upper`, :meth:`Series.clip_lower`, :meth:`DataFrame.clip_upper`, :meth:`DataFrame.clip_lower` (:issue:`24203`)
 - Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`)
 - Removed the previously deprecated :attr:`DatetimeIndex.offset` (:issue:`20730`)
@@ -658,6 +661,7 @@ Numeric
 - Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`)
 - Bug in :class:`NumericIndex` construction that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`)
 - Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`)
+- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`)
 
 Conversion
 ^^^^^^^^^^

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -1,4 +1,5 @@
 import numbers
+from typing import Union
 
 import numpy as np
 from numpy.lib.mixins import NDArrayOperatorsMixin
@@ -117,11 +118,12 @@ class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin):
     # pandas internals, which turns off things like block consolidation.
     _typ = "npy_extension"
     __array_priority__ = 1000
+    _ndarray: np.ndarray
 
     # ------------------------------------------------------------------------
     # Constructors
 
-    def __init__(self, values, copy=False):
+    def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False):
         if isinstance(values, type(self)):
             values = values._ndarray
         if not isinstance(values, np.ndarray):

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2138,68 +2138,6 @@ def duplicated(self, keep="first"):
         """
         return super().duplicated(keep=keep)
 
-    def get_duplicates(self):
-        """
-        Extract duplicated index elements.
-
-        .. deprecated:: 0.23.0
-            Use idx[idx.duplicated()].unique() instead
-
-        Returns a sorted list of index elements which appear more than once in
-        the index.
-
-        Returns
-        -------
-        array-like
-            List of duplicated indexes.
-
-        See Also
-        --------
-        Index.duplicated : Return boolean array denoting duplicates.
-        Index.drop_duplicates : Return Index with duplicates removed.
-
-        Examples
-        --------
-
-        Works on different Index of types.
-
-        >>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates()  # doctest: +SKIP
-        [2, 3]
-
-        Note that for a DatetimeIndex, it does not return a list but a new
-        DatetimeIndex:
-
-        >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03',
-        ...                         '2018-01-03', '2018-01-04', '2018-01-04'],
-        ...                        format='%Y-%m-%d')
-        >>> pd.Index(dates).get_duplicates()  # doctest: +SKIP
-        DatetimeIndex(['2018-01-03', '2018-01-04'],
-                      dtype='datetime64[ns]', freq=None)
-
-        Sorts duplicated elements even when indexes are unordered.
-
-        >>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates()  # doctest: +SKIP
-        [2, 3]
-
-        Return empty array-like structure when all elements are unique.
-
-        >>> pd.Index([1, 2, 3, 4]).get_duplicates()  # doctest: +SKIP
-        []
-        >>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'],
-        ...                        format='%Y-%m-%d')
-        >>> pd.Index(dates).get_duplicates()  # doctest: +SKIP
-        DatetimeIndex([], dtype='datetime64[ns]', freq=None)
-        """
-        warnings.warn(
-            "'get_duplicates' is deprecated and will be removed in "
-            "a future release. You can use "
-            "idx[idx.duplicated()].unique() instead",
-            FutureWarning,
-            stacklevel=2,
-        )
-
-        return self[self.duplicated()].unique()
-
     def _get_unique_index(self, dropna=False):
         """
         Returns an index containing unique values.

diff --git a/pandas/core/missing.py b/pandas/core/missing.py
@@ -277,7 +277,11 @@ def interpolate_1d(
                 inds = lib.maybe_convert_objects(inds)
         else:
             inds = xvalues
-        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
+        # np.interp requires sorted X values, #21037
+        indexer = np.argsort(inds[valid])
+        result[invalid] = np.interp(
+            inds[invalid], inds[valid][indexer], yvalues[valid][indexer]
+        )
         result[preserve_nans] = np.nan
         return result
 

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -361,9 +361,6 @@ def read_hdf(
     >>> df.to_hdf('./store.h5', 'data')
     >>> reread = pd.read_hdf('./store.h5')
     """
-    assert not kwargs, kwargs
-    # NB: in principle more kwargs could be passed to HDFStore, but in
-    #  tests none are.
 
     if mode not in ["r", "r+", "a"]:
         raise ValueError(
@@ -500,13 +497,14 @@ class HDFStore:
     """
 
     _handle: Optional["File"]
+    _mode: str
     _complevel: int
     _fletcher32: bool
 
     def __init__(
         self,
         path,
-        mode=None,
+        mode: str = "a",
         complevel: Optional[int] = None,
         complib=None,
         fletcher32: bool = False,
@@ -837,16 +835,24 @@ def select_as_coordinates(
             raise TypeError("can only read_coordinates with a table")
         return tbl.read_coordinates(where=where, start=start, stop=stop)
 
-    def select_column(self, key: str, column: str, **kwargs):
+    def select_column(
+        self,
+        key: str,
+        column: str,
+        start: Optional[int] = None,
+        stop: Optional[int] = None,
+    ):
         """
         return a single column from the table. This is generally only useful to
         select an indexable
 
         Parameters
         ----------
         key : str
-        column: str
+        column : str
             The column of interest.
+        start : int or None, default None
+        stop : int or None, default None
 
         Raises
         ------
@@ -859,7 +865,7 @@ def select_column(self, key: str, column: str, **kwargs):
         tbl = self.get_storer(key)
         if not isinstance(tbl, Table):
             raise TypeError("can only read_column with a table")
-        return tbl.read_column(column=column, **kwargs)
+        return tbl.read_column(column=column, start=start, stop=stop)
 
     def select_as_multiple(
         self,
@@ -2582,9 +2588,9 @@ class Fixed:
 
         Parameters
         ----------
-
-        parent : my parent HDFStore
-        group  : the group node where the table resides
+        parent : HDFStore
+        group : Node
+            The group node where the table resides.
         """
 
     pandas_kind: str
@@ -2871,7 +2877,7 @@ def read_index(
             return self.read_multi_index(key, start=start, stop=stop)
         elif variety == "regular":
             node = getattr(self.group, key)
-            _, index = self.read_index_node(node, start=start, stop=stop)
+            index = self.read_index_node(node, start=start, stop=stop)
             return index
         else:  # pragma: no cover
             raise TypeError(f"unrecognized index variety: {variety}")
@@ -2931,13 +2937,13 @@ def read_multi_index(
 
         levels = []
         codes = []
-        names = []
+        names: List[Optional[Hashable]] = []
         for i in range(nlevels):
             level_key = f"{key}_level{i}"
             node = getattr(self.group, level_key)
-            name, lev = self.read_index_node(node, start=start, stop=stop)
+            lev = self.read_index_node(node, start=start, stop=stop)
             levels.append(lev)
-            names.append(name)
+            names.append(lev.name)
 
             label_key = f"{key}_label{i}"
             level_codes = self.read_array(label_key, start=start, stop=stop)
@@ -2949,7 +2955,7 @@ def read_multi_index(
 
     def read_index_node(
         self, node: "Node", start: Optional[int] = None, stop: Optional[int] = None
-    ):
+    ) -> Index:
         data = node[start:stop]
         # If the index was an empty array write_array_empty() will
         # have written a sentinel. Here we relace it with the original.
@@ -2997,7 +3003,7 @@ def read_index_node(
 
         index.name = name
 
-        return name, index
+        return index
 
     def write_array_empty(self, key: str, value):
         """ write a 0-len array """
@@ -3131,7 +3137,6 @@ def write(self, obj, **kwargs):
 
 class BlockManagerFixed(GenericFixed):
     attributes = ["ndim", "nblocks"]
-    is_shape_reversed = False
 
     nblocks: int
 
@@ -3158,10 +3163,6 @@ def shape(self):
 
             shape.append(items)
 
-            # hacky - this works for frames, but is reversed for panels
-            if self.is_shape_reversed:
-                shape = shape[::-1]
-
             return shape
         except AttributeError:
             return None
@@ -3259,7 +3260,6 @@ class Table(Fixed):
     table_type: str
     levels = 1
     is_table = True
-    is_shape_reversed = False
 
     index_axes: List[IndexCol]
     non_index_axes: List[Tuple[int, Any]]
@@ -3302,7 +3302,7 @@ def __repr__(self) -> str:
             f"ncols->{self.ncols},indexers->[{jindex_axes}]{dc})"
         )
 
-    def __getitem__(self, c):
+    def __getitem__(self, c: str):
         """ return the axis for c """
         for a in self.axes:
             if c == a.name:
@@ -3345,10 +3345,6 @@ def is_multi_index(self) -> bool:
         """the levels attribute is 1 or a list in the case of a multi-index"""
         return isinstance(self.levels, list)
 
-    def validate_metadata(self, existing):
-        """ create / validate metadata """
-        self.metadata = [c.name for c in self.values_axes if c.metadata is not None]
-
     def validate_multiindex(self, obj):
         """validate that we can store the multi-index; reset and return the
         new object
@@ -3651,8 +3647,8 @@ def read_axes(
         Parameters
         ----------
         where : ???
-        start: int or None, default None
-        stop: int or None, default None
+        start : int or None, default None
+        stop : int or None, default None
 
         Returns
         -------
@@ -3946,7 +3942,7 @@ def get_blk_items(mgr, blocks):
         self.validate_min_itemsize(min_itemsize)
 
         # validate our metadata
-        self.validate_metadata(existing_table)
+        self.metadata = [c.name for c in self.values_axes if c.metadata is not None]
 
         # validate the axes if we have an existing table
         if validate:
@@ -4122,7 +4118,13 @@ class WORMTable(Table):
 
     table_type = "worm"
 
-    def read(self, **kwargs):
+    def read(
+        self,
+        where=None,
+        columns=None,
+        start: Optional[int] = None,
+        stop: Optional[int] = None,
+    ):
         """ read the indices and the indexing array, calculate offset rows and
         return """
         raise NotImplementedError("WORMTable needs to implement read")
@@ -4479,8 +4481,7 @@ def write(self, obj, data_columns=None, **kwargs):
         """ we are going to write this as a frame table """
         if not isinstance(obj, DataFrame):
             name = obj.name or "values"
-            obj = DataFrame({name: obj}, index=obj.index)
-            obj.columns = [name]
+            obj = obj.to_frame(name)
         return super().write(obj=obj, data_columns=obj.columns.tolist(), **kwargs)
 
     def read(