Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into STY-repr-batch-5
Browse files Browse the repository at this point in the history
  • Loading branch information
MomIsBestFriend committed Dec 4, 2019
2 parents 895906b + 6b189d7 commit aa5ee8d
Show file tree
Hide file tree
Showing 17 changed files with 141 additions and 264 deletions.
3 changes: 2 additions & 1 deletion ci/deps/travis-36-cov.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ dependencies:
- numexpr
- numpy=1.15.*
- odfpy
- openpyxl
- openpyxl<=3.0.1
# https://github.com/pandas-dev/pandas/pull/30009 openpyxl 3.0.2 broke
- pandas-gbq
# https://github.com/pydata/pandas-gbq/issues/271
- google-cloud-bigquery<=1.11
Expand Down
1 change: 0 additions & 1 deletion doc/redirects.csv
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,6 @@ generated/pandas.Index.equals,../reference/api/pandas.Index.equals
generated/pandas.Index.factorize,../reference/api/pandas.Index.factorize
generated/pandas.Index.fillna,../reference/api/pandas.Index.fillna
generated/pandas.Index.format,../reference/api/pandas.Index.format
generated/pandas.Index.get_duplicates,../reference/api/pandas.Index.get_duplicates
generated/pandas.Index.get_indexer_for,../reference/api/pandas.Index.get_indexer_for
generated/pandas.Index.get_indexer,../reference/api/pandas.Index.get_indexer
generated/pandas.Index.get_indexer_non_unique,../reference/api/pandas.Index.get_indexer_non_unique
Expand Down
1 change: 0 additions & 1 deletion doc/source/reference/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ Selecting
Index.asof
Index.asof_locs
Index.contains
Index.get_duplicates
Index.get_indexer
Index.get_indexer_for
Index.get_indexer_non_unique
Expand Down
6 changes: 5 additions & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,9 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`)
- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`)
- Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`)
- Removed the previously deprecated ``time_rule`` keyword from (non-public) :func:`offsets.generate_range`, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`)
- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`)
- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`)
- Removed the previously deprecated "by" keyword from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`)
- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`)
- Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`)
Expand Down Expand Up @@ -548,7 +551,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- Removed the previously properties :attr:`DataFrame.is_copy`, :attr:`Series.is_copy` (:issue:`18812`)
- Removed the previously deprecated :meth:`DataFrame.get_ftype_counts`, :meth:`Series.get_ftype_counts` (:issue:`18243`)
- Removed the previously deprecated :meth:`DataFrame.ftypes`, :meth:`Series.ftypes`, :meth:`Series.ftype` (:issue:`26744`)
- Removed the previously deprecated :meth:`Index.get_duplicated`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
- Removed the previously deprecated :meth:`Index.get_duplicates`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
- Removed the previously deprecated :meth:`Series.clip_upper`, :meth:`Series.clip_lower`, :meth:`DataFrame.clip_upper`, :meth:`DataFrame.clip_lower` (:issue:`24203`)
- Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`)
- Removed the previously deprecated :attr:`DatetimeIndex.offset` (:issue:`20730`)
Expand Down Expand Up @@ -658,6 +661,7 @@ Numeric
- Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`)
- Bug in :class:`NumericIndex` construction that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`)
- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`)
- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`)

Conversion
^^^^^^^^^^
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numbers
from typing import Union

import numpy as np
from numpy.lib.mixins import NDArrayOperatorsMixin
Expand Down Expand Up @@ -117,11 +118,12 @@ class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin):
# pandas internals, which turns off things like block consolidation.
_typ = "npy_extension"
__array_priority__ = 1000
_ndarray: np.ndarray

# ------------------------------------------------------------------------
# Constructors

def __init__(self, values, copy=False):
def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False):
if isinstance(values, type(self)):
values = values._ndarray
if not isinstance(values, np.ndarray):
Expand Down
62 changes: 0 additions & 62 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2138,68 +2138,6 @@ def duplicated(self, keep="first"):
"""
return super().duplicated(keep=keep)

def get_duplicates(self):
"""
Extract duplicated index elements.
.. deprecated:: 0.23.0
Use idx[idx.duplicated()].unique() instead
Returns a sorted list of index elements which appear more than once in
the index.
Returns
-------
array-like
List of duplicated indexes.
See Also
--------
Index.duplicated : Return boolean array denoting duplicates.
Index.drop_duplicates : Return Index with duplicates removed.
Examples
--------
Works on different Index of types.
>>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates() # doctest: +SKIP
[2, 3]
Note that for a DatetimeIndex, it does not return a list but a new
DatetimeIndex:
>>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03',
... '2018-01-03', '2018-01-04', '2018-01-04'],
... format='%Y-%m-%d')
>>> pd.Index(dates).get_duplicates() # doctest: +SKIP
DatetimeIndex(['2018-01-03', '2018-01-04'],
dtype='datetime64[ns]', freq=None)
Sorts duplicated elements even when indexes are unordered.
>>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates() # doctest: +SKIP
[2, 3]
Return empty array-like structure when all elements are unique.
>>> pd.Index([1, 2, 3, 4]).get_duplicates() # doctest: +SKIP
[]
>>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'],
... format='%Y-%m-%d')
>>> pd.Index(dates).get_duplicates() # doctest: +SKIP
DatetimeIndex([], dtype='datetime64[ns]', freq=None)
"""
warnings.warn(
"'get_duplicates' is deprecated and will be removed in "
"a future release. You can use "
"idx[idx.duplicated()].unique() instead",
FutureWarning,
stacklevel=2,
)

return self[self.duplicated()].unique()

def _get_unique_index(self, dropna=False):
"""
Returns an index containing unique values.
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,11 @@ def interpolate_1d(
inds = lib.maybe_convert_objects(inds)
else:
inds = xvalues
result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
# np.interp requires sorted X values, #21037
indexer = np.argsort(inds[valid])
result[invalid] = np.interp(
inds[invalid], inds[valid][indexer], yvalues[valid][indexer]
)
result[preserve_nans] = np.nan
return result

Expand Down
67 changes: 34 additions & 33 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,9 +361,6 @@ def read_hdf(
>>> df.to_hdf('./store.h5', 'data')
>>> reread = pd.read_hdf('./store.h5')
"""
assert not kwargs, kwargs
# NB: in principle more kwargs could be passed to HDFStore, but in
# tests none are.

if mode not in ["r", "r+", "a"]:
raise ValueError(
Expand Down Expand Up @@ -500,13 +497,14 @@ class HDFStore:
"""

_handle: Optional["File"]
_mode: str
_complevel: int
_fletcher32: bool

def __init__(
self,
path,
mode=None,
mode: str = "a",
complevel: Optional[int] = None,
complib=None,
fletcher32: bool = False,
Expand Down Expand Up @@ -837,16 +835,24 @@ def select_as_coordinates(
raise TypeError("can only read_coordinates with a table")
return tbl.read_coordinates(where=where, start=start, stop=stop)

def select_column(self, key: str, column: str, **kwargs):
def select_column(
self,
key: str,
column: str,
start: Optional[int] = None,
stop: Optional[int] = None,
):
"""
return a single column from the table. This is generally only useful to
select an indexable
Parameters
----------
key : str
column: str
column : str
The column of interest.
start : int or None, default None
stop : int or None, default None
Raises
------
Expand All @@ -859,7 +865,7 @@ def select_column(self, key: str, column: str, **kwargs):
tbl = self.get_storer(key)
if not isinstance(tbl, Table):
raise TypeError("can only read_column with a table")
return tbl.read_column(column=column, **kwargs)
return tbl.read_column(column=column, start=start, stop=stop)

def select_as_multiple(
self,
Expand Down Expand Up @@ -2582,9 +2588,9 @@ class Fixed:
Parameters
----------
parent : my parent HDFStore
group : the group node where the table resides
parent : HDFStore
group : Node
The group node where the table resides.
"""

pandas_kind: str
Expand Down Expand Up @@ -2871,7 +2877,7 @@ def read_index(
return self.read_multi_index(key, start=start, stop=stop)
elif variety == "regular":
node = getattr(self.group, key)
_, index = self.read_index_node(node, start=start, stop=stop)
index = self.read_index_node(node, start=start, stop=stop)
return index
else: # pragma: no cover
raise TypeError(f"unrecognized index variety: {variety}")
Expand Down Expand Up @@ -2931,13 +2937,13 @@ def read_multi_index(

levels = []
codes = []
names = []
names: List[Optional[Hashable]] = []
for i in range(nlevels):
level_key = f"{key}_level{i}"
node = getattr(self.group, level_key)
name, lev = self.read_index_node(node, start=start, stop=stop)
lev = self.read_index_node(node, start=start, stop=stop)
levels.append(lev)
names.append(name)
names.append(lev.name)

label_key = f"{key}_label{i}"
level_codes = self.read_array(label_key, start=start, stop=stop)
Expand All @@ -2949,7 +2955,7 @@ def read_multi_index(

def read_index_node(
self, node: "Node", start: Optional[int] = None, stop: Optional[int] = None
):
) -> Index:
data = node[start:stop]
# If the index was an empty array write_array_empty() will
# have written a sentinel. Here we relace it with the original.
Expand Down Expand Up @@ -2997,7 +3003,7 @@ def read_index_node(

index.name = name

return name, index
return index

def write_array_empty(self, key: str, value):
""" write a 0-len array """
Expand Down Expand Up @@ -3131,7 +3137,6 @@ def write(self, obj, **kwargs):

class BlockManagerFixed(GenericFixed):
attributes = ["ndim", "nblocks"]
is_shape_reversed = False

nblocks: int

Expand All @@ -3158,10 +3163,6 @@ def shape(self):

shape.append(items)

# hacky - this works for frames, but is reversed for panels
if self.is_shape_reversed:
shape = shape[::-1]

return shape
except AttributeError:
return None
Expand Down Expand Up @@ -3259,7 +3260,6 @@ class Table(Fixed):
table_type: str
levels = 1
is_table = True
is_shape_reversed = False

index_axes: List[IndexCol]
non_index_axes: List[Tuple[int, Any]]
Expand Down Expand Up @@ -3302,7 +3302,7 @@ def __repr__(self) -> str:
f"ncols->{self.ncols},indexers->[{jindex_axes}]{dc})"
)

def __getitem__(self, c):
def __getitem__(self, c: str):
""" return the axis for c """
for a in self.axes:
if c == a.name:
Expand Down Expand Up @@ -3345,10 +3345,6 @@ def is_multi_index(self) -> bool:
"""the levels attribute is 1 or a list in the case of a multi-index"""
return isinstance(self.levels, list)

def validate_metadata(self, existing):
""" create / validate metadata """
self.metadata = [c.name for c in self.values_axes if c.metadata is not None]

def validate_multiindex(self, obj):
"""validate that we can store the multi-index; reset and return the
new object
Expand Down Expand Up @@ -3651,8 +3647,8 @@ def read_axes(
Parameters
----------
where : ???
start: int or None, default None
stop: int or None, default None
start : int or None, default None
stop : int or None, default None
Returns
-------
Expand Down Expand Up @@ -3946,7 +3942,7 @@ def get_blk_items(mgr, blocks):
self.validate_min_itemsize(min_itemsize)

# validate our metadata
self.validate_metadata(existing_table)
self.metadata = [c.name for c in self.values_axes if c.metadata is not None]

# validate the axes if we have an existing table
if validate:
Expand Down Expand Up @@ -4122,7 +4118,13 @@ class WORMTable(Table):

table_type = "worm"

def read(self, **kwargs):
def read(
self,
where=None,
columns=None,
start: Optional[int] = None,
stop: Optional[int] = None,
):
""" read the indices and the indexing array, calculate offset rows and
return """
raise NotImplementedError("WORMTable needs to implement read")
Expand Down Expand Up @@ -4479,8 +4481,7 @@ def write(self, obj, data_columns=None, **kwargs):
""" we are going to write this as a frame table """
if not isinstance(obj, DataFrame):
name = obj.name or "values"
obj = DataFrame({name: obj}, index=obj.index)
obj.columns = [name]
obj = obj.to_frame(name)
return super().write(obj=obj, data_columns=obj.columns.tolist(), **kwargs)

def read(
Expand Down
Loading

0 comments on commit aa5ee8d

Please sign in to comment.