From ede0dae3b74a5b9cba11ad6d86fcbc92329f631c Mon Sep 17 00:00:00 2001 From: Gjelt Date: Tue, 27 Nov 2018 00:12:13 +0100 Subject: [PATCH 01/78] DOC: update the DataFrame.reindex_like docstring (#22775) --- pandas/core/frame.py | 90 ++++++++++++-------- pandas/core/generic.py | 184 +++++++++++++++++++++++++++++------------ 2 files changed, 187 insertions(+), 87 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 032d68e2d3e8c1..592825a7ea63b7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3924,22 +3924,36 @@ def shift(self, periods=1, freq=None, axis=0): def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False): """ + Set the DataFrame index using existing columns. + Set the DataFrame index (row labels) using one or more existing - columns. By default yields a new object. + columns. The index can replace the existing index or expand on it. Parameters ---------- - keys : column label or list of column labels / arrays - drop : boolean, default True - Delete columns to be used as the new index - append : boolean, default False - Whether to append columns to existing index - inplace : boolean, default False - Modify the DataFrame in place (do not create a new object) - verify_integrity : boolean, default False + keys : label or list of label + Name or names of the columns that will be used as the index. + drop : bool, default True + Delete columns to be used as the new index. + append : bool, default False + Whether to append columns to existing index. + inplace : bool, default False + Modify the DataFrame in place (do not create a new object). + verify_integrity : bool, default False Check the new index for duplicates. Otherwise defer the check until necessary. Setting to False will improve the performance of this - method + method. + + Returns + ------- + DataFrame + Changed row labels. + + See Also + -------- + DataFrame.reset_index : Opposite of set_index. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. Returns ------- @@ -3949,22 +3963,23 @@ def set_index(self, keys, drop=True, append=False, inplace=False, -------- >>> df = pd.DataFrame({'month': [1, 4, 7, 10], ... 'year': [2012, 2014, 2013, 2014], - ... 'sale':[55, 40, 84, 31]}) - month sale year - 0 1 55 2012 - 1 4 40 2014 - 2 7 84 2013 - 3 10 31 2014 + ... 'sale': [55, 40, 84, 31]}) + >>> df + month year sale + 0 1 2012 55 + 1 4 2014 40 + 2 7 2013 84 + 3 10 2014 31 Set the index to become the 'month' column: >>> df.set_index('month') - sale year + year sale month - 1 55 2012 - 4 40 2014 - 7 84 2013 - 10 31 2014 + 1 2012 55 + 4 2014 40 + 7 2013 84 + 10 2014 31 Create a multi-index using columns 'year' and 'month': @@ -4072,22 +4087,22 @@ def set_index(self, keys, drop=True, append=False, inplace=False, def reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill=''): """ - For DataFrame with multi-level index, return new DataFrame with - labeling information in the columns under the index names, defaulting - to 'level_0', 'level_1', etc. if any are None. For a standard index, - the index name will be used (if set), otherwise a default 'index' or - 'level_0' (if 'index' is already taken) will be used. + Reset the index, or a level of it. + + Reset the index of the DataFrame, and use the default one instead. + If the DataFrame has a MultiIndex, this method can remove one or more + levels. Parameters ---------- level : int, str, tuple, or list, default None Only remove the given levels from the index. Removes all levels by - default - drop : boolean, default False + default. + drop : bool, default False Do not try to insert index into dataframe columns. This resets the index to the default integer index. - inplace : boolean, default False - Modify the DataFrame in place (do not create a new object) + inplace : bool, default False + Modify the DataFrame in place (do not create a new object). col_level : int or str, default 0 If the columns have multiple levels, determines which level the labels are inserted into. By default it is inserted into the first @@ -4098,13 +4113,20 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0, Returns ------- - resetted : DataFrame + DataFrame + DataFrame with the new index. + + See Also + -------- + DataFrame.set_index : Opposite of reset_index. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. Examples -------- - >>> df = pd.DataFrame([('bird', 389.0), - ... ('bird', 24.0), - ... ('mammal', 80.5), + >>> df = pd.DataFrame([('bird', 389.0), + ... ('bird', 24.0), + ... ('mammal', 80.5), ... ('mammal', np.nan)], ... index=['falcon', 'parrot', 'lion', 'monkey'], ... columns=('class', 'max_speed')) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3a7016ce396769..e8402e7eefd901 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3423,29 +3423,99 @@ def select(self, crit, axis=0): def reindex_like(self, other, method=None, copy=True, limit=None, tolerance=None): - """Return an object with matching indices to myself. + """ + Return an object with matching indices as other object. + + Conform the object to the same index on all axes. Optional + filling logic, placing NaN in locations having no value + in the previous index. A new object is produced unless the + new index is equivalent to the current one and copy=False. Parameters ---------- - other : Object - method : string or None - copy : boolean, default True + other : Object of the same data type + Its row and column indices are used to define the new indices + of this object. + method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} + Method to use for filling holes in reindexed DataFrame. + Please note: this is only applicable to DataFrames/Series with a + monotonically increasing/decreasing index. + + * None (default): don't fill gaps + * pad / ffill: propagate last valid observation forward to next + valid + * backfill / bfill: use next valid observation to fill gap + * nearest: use nearest valid observations to fill gap + + copy : bool, default True + Return a new object, even if the passed indexes are the same. limit : int, default None Maximum number of consecutive labels to fill for inexact matches. tolerance : optional - Maximum distance between labels of the other object and this - object for inexact matches. Can be list-like. + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations most + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. .. versionadded:: 0.21.0 (list-like tolerance) + Returns + ------- + Series or DataFrame + Same type as caller, but with changed indices on each axis. + + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex : Change to new indices or expand indices. + Notes ----- - Like calling s.reindex(index=other.index, columns=other.columns, - method=...) + Same as calling + ``.reindex(index=other.index, columns=other.columns,...)``. - Returns - ------- - reindexed : same as input + Examples + -------- + >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'], + ... [31, 87.8, 'high'], + ... [22, 71.6, 'medium'], + ... [35, 95, 'medium']], + ... columns=['temp_celsius', 'temp_fahrenheit', 'windspeed'], + ... index=pd.date_range(start='2014-02-12', + ... end='2014-02-15', freq='D')) + + >>> df1 + temp_celsius temp_fahrenheit windspeed + 2014-02-12 24.3 75.7 high + 2014-02-13 31.0 87.8 high + 2014-02-14 22.0 71.6 medium + 2014-02-15 35.0 95.0 medium + + >>> df2 = pd.DataFrame([[28, 'low'], + ... [30, 'low'], + ... [35.1, 'medium']], + ... columns=['temp_celsius', 'windspeed'], + ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13', + ... '2014-02-15'])) + + >>> df2 + temp_celsius windspeed + 2014-02-12 28.0 low + 2014-02-13 30.0 low + 2014-02-15 35.1 medium + + >>> df2.reindex_like(df1) + temp_celsius temp_fahrenheit windspeed + 2014-02-12 28.0 NaN low + 2014-02-13 30.0 NaN low + 2014-02-14 NaN NaN NaN + 2014-02-15 35.1 NaN medium """ d = other._construct_axes_dict(axes=self._AXIS_ORDERS, method=method, copy=copy, limit=limit, @@ -3812,36 +3882,36 @@ def reindex(self, *args, **kwargs): Conform %(klass)s to new index with optional filling logic, placing NA/NaN in locations having no value in the previous index. A new object is produced unless the new index is equivalent to the current one and - copy=False + ``copy=False``. Parameters ---------- %(optional_labels)s - %(axes)s : array-like, optional (should be specified using keywords) - New labels / index to conform to. Preferably an Index object to - avoid duplicating data + %(axes)s : array-like, optional + New labels / index to conform to, should be specified using + keywords. Preferably an Index object to avoid duplicating data %(optional_axis)s - method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional - method to use for filling holes in reindexed DataFrame. + method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} + Method to use for filling holes in reindexed DataFrame. Please note: this is only applicable to DataFrames/Series with a monotonically increasing/decreasing index. - * default: don't fill gaps + * None (default): don't fill gaps * pad / ffill: propagate last valid observation forward to next valid * backfill / bfill: use next valid observation to fill gap * nearest: use nearest valid observations to fill gap - copy : boolean, default True - Return a new object, even if the passed indexes are the same + copy : bool, default True + Return a new object, even if the passed indexes are the same. level : int or name Broadcast across a level, matching Index values on the - passed MultiIndex level + passed MultiIndex level. fill_value : scalar, default np.NaN Value to use for missing values. Defaults to NaN, but can be any - "compatible" value + "compatible" value. limit : int, default None - Maximum number of consecutive elements to forward or backward fill + Maximum number of consecutive elements to forward or backward fill. tolerance : optional Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations most @@ -3855,6 +3925,12 @@ def reindex(self, *args, **kwargs): .. versionadded:: 0.21.0 (list-like tolerance) + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex_like : Change to same indices as other DataFrame. + Examples -------- @@ -3946,12 +4022,12 @@ def reindex(self, *args, **kwargs): ... index=date_index) >>> df2 prices - 2010-01-01 100 - 2010-01-02 101 + 2010-01-01 100.0 + 2010-01-02 101.0 2010-01-03 NaN - 2010-01-04 100 - 2010-01-05 89 - 2010-01-06 88 + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 Suppose we decide to expand the dataframe to cover a wider date range. @@ -3962,12 +4038,12 @@ def reindex(self, *args, **kwargs): 2009-12-29 NaN 2009-12-30 NaN 2009-12-31 NaN - 2010-01-01 100 - 2010-01-02 101 + 2010-01-01 100.0 + 2010-01-02 101.0 2010-01-03 NaN - 2010-01-04 100 - 2010-01-05 89 - 2010-01-06 88 + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 2010-01-07 NaN The index entries that did not have a value in the original data frame @@ -3980,15 +4056,15 @@ def reindex(self, *args, **kwargs): >>> df2.reindex(date_index2, method='bfill') prices - 2009-12-29 100 - 2009-12-30 100 - 2009-12-31 100 - 2010-01-01 100 - 2010-01-02 101 + 2009-12-29 100.0 + 2009-12-30 100.0 + 2009-12-31 100.0 + 2010-01-01 100.0 + 2010-01-02 101.0 2010-01-03 NaN - 2010-01-04 100 - 2010-01-05 89 - 2010-01-06 88 + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 2010-01-07 NaN Please note that the ``NaN`` value present in the original dataframe @@ -4002,7 +4078,7 @@ def reindex(self, *args, **kwargs): Returns ------- - reindexed : %(klass)s + %(klass)s with changed index. """ # TODO: Decide if we care about having different examples for different # kinds @@ -4074,11 +4150,10 @@ def _needs_reindex_multi(self, axes, method, level): def _reindex_multi(self, axes, copy, fill_value): return NotImplemented - _shared_docs[ - 'reindex_axis'] = ("""Conform input object to new index with optional - filling logic, placing NA/NaN in locations having no value in the - previous index. A new object is produced unless the new index is - equivalent to the current one and copy=False + _shared_docs['reindex_axis'] = ("""Conform input object to new index + with optional filling logic, placing NA/NaN in locations having + no value in the previous index. A new object is produced unless + the new index is equivalent to the current one and copy=False. Parameters ---------- @@ -4115,17 +4190,20 @@ def _reindex_multi(self, axes, copy, fill_value): .. versionadded:: 0.21.0 (list-like tolerance) - Examples - -------- - >>> df.reindex_axis(['A', 'B', 'C'], axis=1) - See Also -------- - reindex, reindex_like + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. Returns ------- - reindexed : %(klass)s + %(klass)s + + Examples + -------- + >>> df.reindex_axis(['A', 'B', 'C'], axis=1) """) @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs) From e5d31abe225edda99b80b0647e77c0579b0995e5 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 27 Nov 2018 02:31:49 +0100 Subject: [PATCH 02/78] DOC: Change code-block to ipython in r_interface.rst (#23906) --- doc/source/r_interface.rst | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/doc/source/r_interface.rst b/doc/source/r_interface.rst index d0b26016680692..f40f9199aaf668 100644 --- a/doc/source/r_interface.rst +++ b/doc/source/r_interface.rst @@ -33,10 +33,11 @@ See also the documentation of the `rpy2 `__ project: In the remainder of this page, a few examples of explicit conversion is given. The pandas conversion of rpy2 needs first to be activated: -.. code-block:: python +.. ipython:: + :verbatim: - >>> from rpy2.robjects import pandas2ri # doctest: +SKIP - >>> pandas2ri.activate() # doctest: +SKIP + In [1]: from rpy2.robjects import pandas2ri + ...: pandas2ri.activate() Transferring R data sets into Python ------------------------------------ @@ -44,11 +45,15 @@ Transferring R data sets into Python Once the pandas conversion is activated (``pandas2ri.activate()``), many conversions of R to pandas objects will be done automatically. For example, to obtain the 'iris' dataset as a pandas DataFrame: -.. code-block:: python +.. ipython:: + :verbatim: - >>> from rpy2.robjects import r # doctest: +SKIP - >>> r.data('iris') # doctest: +SKIP - >>> r['iris'].head() # doctest: +SKIP + In [2]: from rpy2.robjects import r + + In [3]: r.data('iris') + + In [4]: r['iris'].head() + Out[4]: Sepal.Length Sepal.Width Petal.Length Petal.Width Species 0 5.1 3.5 1.4 0.2 setosa 1 4.9 3.0 1.4 0.2 setosa @@ -66,14 +71,19 @@ Converting DataFrames into R objects The ``pandas2ri.py2ri`` function support the reverse operation to convert DataFrames into the equivalent R object (that is, **data.frame**): -.. code-block:: python +.. ipython:: + :verbatim: + + In [5]: df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, + ...: index=["one", "two", "three"]) + + In [6]: r_dataframe = pandas2ri.py2ri(df) + + In [7]: print(type(r_dataframe)) + Out[7]: - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, - ... index=["one", "two", "three"]) # doctest: +SKIP - >>> r_dataframe = pandas2ri.py2ri(df) # doctest: +SKIP - >>> print(type(r_dataframe)) # doctest: +SKIP - - >>> print(r_dataframe) # doctest: +SKIP + In [8]: print(r_dataframe) + Out[8]: A B C one 1 4 7 two 2 5 8 From f8b6496e1ff727aab2fe3ac599f0429c27a47445 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 27 Nov 2018 02:35:21 +0100 Subject: [PATCH 03/78] Fix PEP-8 issues in timedeltas.rst (#23905) Signed-off-by: Fabian Haase --- doc/source/timedeltas.rst | 42 +++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index e602e45784f4a5..8dab39aafbf676 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -4,18 +4,12 @@ .. ipython:: python :suppress: - import datetime import numpy as np import pandas as pd + np.random.seed(123456) - randn = np.random.randn - randint = np.random.randint np.set_printoptions(precision=4, suppress=True) - pd.options.display.max_rows=15 - import dateutil - import pytz - from dateutil.relativedelta import relativedelta - from pandas.tseries.offsets import * + pd.options.display.max_rows = 15 .. _timedeltas.timedeltas: @@ -37,6 +31,8 @@ You can construct a ``Timedelta`` scalar through various arguments: .. ipython:: python + import datetime + # strings pd.Timedelta('1 days') pd.Timedelta('1 days 00:00:00') @@ -74,13 +70,14 @@ You can construct a ``Timedelta`` scalar through various arguments: .. ipython:: python - pd.Timedelta(Second(2)) + pd.Timedelta(pd.offsets.Second(2)) Further, operations among the scalars yield another scalar ``Timedelta``. .. ipython:: python - pd.Timedelta(Day(2)) + pd.Timedelta(Second(2)) + pd.Timedelta('00:00:00.000123') + pd.Timedelta(pd.offsets.Day(2)) + pd.Timedelta(pd.offsets.Second(2)) +\ + pd.Timedelta('00:00:00.000123') to_timedelta ~~~~~~~~~~~~ @@ -135,8 +132,8 @@ subtraction operations on ``datetime64[ns]`` Series, or ``Timestamps``. .. ipython:: python s = pd.Series(pd.date_range('2012-1-1', periods=3, freq='D')) - td = pd.Series([ pd.Timedelta(days=i) for i in range(3) ]) - df = pd.DataFrame(dict(A = s, B = td)) + td = pd.Series([pd.Timedelta(days=i) for i in range(3)]) + df = pd.DataFrame({'A': s, 'B': td}) df df['C'] = df['A'] + df['B'] df @@ -145,8 +142,8 @@ subtraction operations on ``datetime64[ns]`` Series, or ``Timestamps``. s - s.max() s - datetime.datetime(2011, 1, 1, 3, 5) s + datetime.timedelta(minutes=5) - s + Minute(5) - s + Minute(5) + Milli(5) + s + pd.offsets.Minute(5) + s + pd.offsets.Minute(5) + pd.offsets.Milli(5) Operations with scalars from a ``timedelta64[ns]`` series: @@ -184,7 +181,7 @@ Operands can also appear in a reversed order (a singular object operated with a A = s - pd.Timestamp('20120101') - pd.Timedelta('00:05:05') B = s - pd.Series(pd.date_range('2012-1-2', periods=3, freq='D')) - df = pd.DataFrame(dict(A=A, B=B)) + df = pd.DataFrame({'A': A, 'B': B}) df df.min() @@ -232,7 +229,8 @@ Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` ob .. ipython:: python - y2 = pd.Series(pd.to_timedelta(['-1 days +00:00:05', 'nat', '-1 days +00:00:05', '1 days'])) + y2 = pd.Series(pd.to_timedelta(['-1 days +00:00:05', 'nat', + '-1 days +00:00:05', '1 days'])) y2 y2.mean() y2.median() @@ -250,8 +248,10 @@ Note that division by the NumPy scalar is true division, while astyping is equiv .. ipython:: python - td = pd.Series(pd.date_range('20130101', periods=4)) - \ - pd.Series(pd.date_range('20121201', periods=4)) + december = pd.Series(pd.date_range('20121201', periods=4)) + january = pd.Series(pd.date_range('20130101', periods=4)) + td = january - december + td[2] += datetime.timedelta(minutes=5, seconds=3) td[3] = np.nan td @@ -360,8 +360,8 @@ or ``np.timedelta64`` objects. Passing ``np.nan/pd.NaT/nat`` will represent miss .. ipython:: python - pd.TimedeltaIndex(['1 days', '1 days, 00:00:05', - np.timedelta64(2,'D'), datetime.timedelta(days=2,seconds=2)]) + pd.TimedeltaIndex(['1 days', '1 days, 00:00:05', np.timedelta64(2, 'D'), + datetime.timedelta(days=2, seconds=2)]) The string 'infer' can be passed in order to set the frequency of the index as the inferred frequency upon creation: @@ -458,7 +458,7 @@ Similarly to frequency conversion on a ``Series`` above, you can convert these i .. ipython:: python - tdi / np.timedelta64(1,'s') + tdi / np.timedelta64(1, 's') tdi.astype('timedelta64[s]') Scalars type ops work as well. These can potentially return a *different* type of index. From c05b73fb7ca79897e220e411cf70a521a1997a6c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 26 Nov 2018 17:43:17 -0800 Subject: [PATCH 04/78] implement deprecation portion of #23675 (#23937) --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/arrays/datetimes.py | 82 ++++++++++++++++++- pandas/core/indexes/datetimes.py | 50 +++++------ pandas/core/tools/datetimes.py | 12 ++- .../indexes/datetimes/test_construction.py | 40 +++++++++ 5 files changed, 157 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 6fd0a224b81b2b..1127d02f0a8221 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1043,6 +1043,7 @@ Deprecations `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) - :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) +- Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23675`) - The ``keep_tz=False`` option (the default) of the ``keep_tz`` keyword of :meth:`DatetimeIndex.to_series` is deprecated (:issue:`17832`). - Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ae366149ab8999..41c28c5ee3b93b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -15,8 +15,9 @@ from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( - _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_int64_dtype, - is_object_dtype) + _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type, + is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype, + is_timedelta64_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna @@ -1421,6 +1422,83 @@ def to_julian_date(self): DatetimeArrayMixin._add_datetimelike_methods() +# ------------------------------------------------------------------- +# Constructor Helpers + +def maybe_infer_tz(tz, inferred_tz): + """ + If a timezone is inferred from data, check that it is compatible with + the user-provided timezone, if any. + + Parameters + ---------- + tz : tzinfo or None + inferred_tz : tzinfo or None + + Returns + ------- + tz : tzinfo or None + + Raises + ------ + TypeError : if both timezones are present but do not match + """ + if tz is None: + tz = inferred_tz + elif inferred_tz is None: + pass + elif not timezones.tz_compare(tz, inferred_tz): + raise TypeError('data is already tz-aware {inferred_tz}, unable to ' + 'set specified tz: {tz}' + .format(inferred_tz=inferred_tz, tz=tz)) + return tz + + +def maybe_convert_dtype(data, copy): + """ + Convert data based on dtype conventions, issuing deprecation warnings + or errors where appropriate. + Parameters + ---------- + data : np.ndarray or pd.Index + copy : bool + Returns + ------- + data : np.ndarray or pd.Index + copy : bool + Raises + ------ + TypeError : PeriodDType data is passed + """ + if is_float_dtype(data): + # Note: we must cast to datetime64[ns] here in order to treat these + # as wall-times instead of UTC timestamps. + data = data.astype(_NS_DTYPE) + copy = False + # TODO: deprecate this behavior to instead treat symmetrically + # with integer dtypes. See discussion in GH#23675 + + elif is_timedelta64_dtype(data): + warnings.warn("Passing timedelta64-dtype data is deprecated, will " + "raise a TypeError in a future version", + FutureWarning, stacklevel=3) + data = data.view(_NS_DTYPE) + + elif is_period_dtype(data): + # Note: without explicitly raising here, PeriondIndex + # test_setops.test_join_does_not_recur fails + raise TypeError("Passing PeriodDtype data is invalid. " + "Use `data.to_timestamp()` instead") + + elif is_extension_type(data) and not is_datetime64tz_dtype(data): + # Includes categorical + # TODO: We have no tests for these + data = np.array(data, dtype=np.object_) + copy = False + + return data, copy + + def _generate_regular_range(cls, start, end, periods, freq): """ Generate a range of dates with the spans between dates described by diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1ba14ffce383be..72bfefaf331514 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -16,16 +16,17 @@ from pandas.core.dtypes.common import ( _INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype, - is_datetime64_ns_dtype, is_datetimetz, is_dtype_equal, is_float, - is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar, - is_string_like, pandas_dtype) + is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimetz, + is_dtype_equal, is_float, is_integer, is_integer_dtype, is_list_like, + is_period_dtype, is_scalar, is_string_like, pandas_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.datetimes import ( - DatetimeArrayMixin as DatetimeArray, _to_m8) + DatetimeArrayMixin as DatetimeArray, _to_m8, maybe_convert_dtype, + maybe_infer_tz) from pandas.core.base import _shared_docs import pandas.core.common as com from pandas.core.indexes.base import Index, _index_shared_docs @@ -246,50 +247,49 @@ def __new__(cls, data=None, name = data.name freq, freq_infer = dtl.maybe_infer_freq(freq) + if freq is None and hasattr(data, "freq"): + # i.e. DatetimeArray/Index + freq = data.freq + verify_integrity = False # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) - if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArray)): - # other iterable of some kind - if not isinstance(data, (list, tuple)): + if not hasattr(data, "dtype"): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e. generator data = list(data) - data = np.asarray(data, dtype='O') + data = np.asarray(data) + copy = False elif isinstance(data, ABCSeries): data = data._values - # data must be Index or np.ndarray here + # By this point we are assured to have either a numpy array or Index + data, copy = maybe_convert_dtype(data, copy) + if not (is_datetime64_dtype(data) or is_datetimetz(data) or is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'): data = tools.to_datetime(data, dayfirst=dayfirst, yearfirst=yearfirst) - if isinstance(data, DatetimeArray): - if tz is None: - tz = data.tz - elif data.tz is None: - data = data.tz_localize(tz, ambiguous=ambiguous) - else: - # the tz's must match - if not timezones.tz_compare(tz, data.tz): - msg = ('data is already tz-aware {0}, unable to ' - 'set specified tz: {1}') - raise TypeError(msg.format(data.tz, tz)) - + if is_datetime64tz_dtype(data): + tz = maybe_infer_tz(tz, data.tz) subarr = data._data - if freq is None: - freq = data.freq - verify_integrity = False - elif issubclass(data.dtype.type, np.datetime64): + elif is_datetime64_dtype(data): + # tz-naive DatetimeArray/Index or ndarray[datetime64] + data = getattr(data, "_data", data) if data.dtype != _NS_DTYPE: data = conversion.ensure_datetime64ns(data) + if tz is not None: # Convert tz-naive to UTC tz = timezones.maybe_get_tz(tz) data = conversion.tz_localize_to_utc(data.view('i8'), tz, ambiguous=ambiguous) subarr = data.view(_NS_DTYPE) + else: # must be integer dtype otherwise # assume this data are epoch timestamps diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 2c6fdb3eaf03c5..ee44a64514f4f1 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -171,6 +171,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex + from pandas.core.arrays.datetimes import maybe_convert_dtype + if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') @@ -208,6 +210,11 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') + # warn if passing timedelta64, raise for PeriodDtype + # NB: this must come after unit transformation + orig_arg = arg + arg, _ = maybe_convert_dtype(arg, copy=False) + arg = ensure_object(arg) require_iso8601 = False @@ -231,7 +238,10 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, # shortcut formatting here if format == '%Y%m%d': try: - result = _attempt_YYYYMMDD(arg, errors=errors) + # pass orig_arg as float-dtype may have been converted to + # datetime64[ns] + orig_arg = ensure_object(orig_arg) + result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): raise ValueError("cannot convert the input to " "'%Y%m%d' date format") diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 02755c7e58a1db..6651a27098630d 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -14,11 +14,51 @@ from pandas import ( DatetimeIndex, Index, Timestamp, date_range, datetime, offsets, to_datetime) +from pandas.core.arrays import period_array import pandas.util.testing as tm class TestDatetimeIndex(object): + def test_dti_with_period_data_raises(self): + # GH#23675 + data = pd.PeriodIndex(['2016Q1', '2016Q2'], freq='Q') + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + DatetimeIndex(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + to_datetime(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + DatetimeIndex(period_array(data)) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + to_datetime(period_array(data)) + + def test_dti_with_timedelta64_data_deprecation(self): + # GH#23675 + data = np.array([0], dtype='m8[ns]') + with tm.assert_produces_warning(FutureWarning): + result = DatetimeIndex(data) + + assert result[0] == Timestamp('1970-01-01') + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = to_datetime(data) + + assert result[0] == Timestamp('1970-01-01') + + with tm.assert_produces_warning(FutureWarning): + result = DatetimeIndex(pd.TimedeltaIndex(data)) + + assert result[0] == Timestamp('1970-01-01') + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = to_datetime(pd.TimedeltaIndex(data)) + + assert result[0] == Timestamp('1970-01-01') + def test_construction_caching(self): df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3), From 11e2cb77534c8dac6dc6968dc5223baef94164b6 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Tue, 27 Nov 2018 01:50:20 +0000 Subject: [PATCH 05/78] Increase timeout for flakey test (#23849) --- pandas/tests/frame/test_apply.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index c43872bfc3ddb1..a3b72d223f9573 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -11,7 +11,7 @@ import warnings import numpy as np -from hypothesis import given +from hypothesis import given, settings from hypothesis.strategies import composite, dates, integers, sampled_from from pandas import (notna, DataFrame, Series, MultiIndex, date_range, @@ -1157,6 +1157,7 @@ def test_agg_cython_table_raises(self, df, func, expected, axis): df.agg(func, axis=axis) @given(index=indices(max_length=5), num_columns=integers(0, 5)) + @settings(deadline=1000) def test_frequency_is_original(self, index, num_columns): # GH 22150 original = index.copy() From feb9c6e37ea83ce4427b7748adde56a3481bcdf9 Mon Sep 17 00:00:00 2001 From: Kimi Li Date: Mon, 26 Nov 2018 21:24:45 -0500 Subject: [PATCH 06/78] DOC: Move content in doc/README.rst to doc/source/contributing.rst (#23840) --- doc/README.rst | 174 +------------------------------------------------ 1 file changed, 1 insertion(+), 173 deletions(-) diff --git a/doc/README.rst b/doc/README.rst index 12950d323f5d34..a11ed8d9d03e30 100644 --- a/doc/README.rst +++ b/doc/README.rst @@ -1,173 +1 @@ -.. _contributing.docs: - -Contributing to the documentation -================================= - -Whether you are someone who loves writing, teaching, or development, -contributing to the documentation is a huge value. If you don't see yourself -as a developer type, please don't stress and know that we want you to -contribute. You don't even have to be an expert on *pandas* to do so! -Something as simple as rewriting small passages for clarity -as you reference the docs is a simple but effective way to contribute. The -next person to read that passage will be in your debt! - -Actually, there are sections of the docs that are worse off by being written -by experts. If something in the docs doesn't make sense to you, updating the -relevant section after you figure it out is a simple way to ensure it will -help the next person. - -.. contents:: Table of contents: - :local: - - -About the pandas documentation ------------------------------- - -The documentation is written in **reStructuredText**, which is almost like writing -in plain English, and built using `Sphinx `__. The -Sphinx Documentation has an excellent `introduction to reST -`__. Review the Sphinx docs to perform more -complex changes to the documentation as well. - -Some other important things to know about the docs: - -- The pandas documentation consists of two parts: the docstrings in the code - itself and the docs in this folder ``pandas/doc/``. - - The docstrings provide a clear explanation of the usage of the individual - functions, while the documentation in this folder consists of tutorial-like - overviews per topic together with some other information (what's new, - installation, etc). - -- The docstrings follow the **Numpy Docstring Standard** which is used widely - in the Scientific Python community. This standard specifies the format of - the different sections of the docstring. See `this document - `_ - for a detailed explanation, or look at some of the existing functions to - extend it in a similar manner. - -- The tutorials make heavy use of the `ipython directive - `_ sphinx extension. - This directive lets you put code in the documentation which will be run - during the doc build. For example: - - :: - - .. ipython:: python - - x = 2 - x**3 - - will be rendered as - - :: - - In [1]: x = 2 - - In [2]: x**3 - Out[2]: 8 - - This means that almost all code examples in the docs are always run (and the - output saved) during the doc build. This way, they will always be up to date, - but it makes the doc building a bit more complex. - - -How to build the pandas documentation -------------------------------------- - -Requirements -^^^^^^^^^^^^ - -To build the pandas docs there are some extra requirements: you will need to -have ``sphinx`` and ``ipython`` installed. `numpydoc -`_ is used to parse the docstrings that -follow the Numpy Docstring Standard (see above), but you don't need to install -this because a local copy of ``numpydoc`` is included in the pandas source -code. `nbsphinx `_ is used to convert -Jupyter notebooks. You will need to install it if you intend to modify any of -the notebooks included in the documentation. - -Furthermore, it is recommended to have all `optional dependencies -`_ -installed. This is not needed, but be aware that you will see some error -messages. Because all the code in the documentation is executed during the doc -build, the examples using this optional dependencies will generate errors. -Run ``pd.show_versions()`` to get an overview of the installed version of all -dependencies. - -.. warning:: - - Sphinx version >= 1.2.2 or the older 1.1.3 is required. - -Building pandas -^^^^^^^^^^^^^^^ - -For a step-by-step overview on how to set up your environment, to work with -the pandas code and git, see `the developer pages -`_. -When you start to work on some docs, be sure to update your code to the latest -development version ('master'):: - - git fetch upstream - git rebase upstream/master - -Often it will be necessary to rebuild the C extension after updating:: - - python setup.py build_ext --inplace - -Building the documentation -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -So how do you build the docs? Navigate to your local folder -``pandas/doc/`` directory in the console and run:: - - python make.py html - -And then you can find the html output in the folder ``pandas/doc/build/html/``. - -The first time it will take quite a while, because it has to run all the code -examples in the documentation and build all generated docstring pages. -In subsequent evocations, sphinx will try to only build the pages that have -been modified. - -If you want to do a full clean build, do:: - - python make.py clean - python make.py build - - -Starting with 0.13.1 you can tell ``make.py`` to compile only a single section -of the docs, greatly reducing the turn-around time for checking your changes. -You will be prompted to delete `.rst` files that aren't required, since the -last committed version can always be restored from git. - -:: - - #omit autosummary and API section - python make.py clean - python make.py --no-api - - # compile the docs with only a single - # section, that which is in indexing.rst - python make.py clean - python make.py --single indexing - -For comparison, a full doc build may take 10 minutes. a ``-no-api`` build -may take 3 minutes and a single section may take 15 seconds. - -Where to start? ---------------- - -There are a number of issues listed under `Docs -`_ -and `good first issue -`_ -where you could start out. - -Or maybe you have an idea of your own, by using pandas, looking for something -in the documentation and thinking 'this can be improved', let's do something -about that! - -Feel free to ask questions on `mailing list -`_ or submit an -issue on Github. +See `contributing.rst `_ in this repo. From 5e0c392aabbbde49604bf58ed216bcc61b6b5794 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 27 Nov 2018 02:37:14 +0000 Subject: [PATCH 07/78] CLN: io/formats/html.py: refactor (#23818) --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/io/formats/format.py | 8 ++----- pandas/io/formats/html.py | 37 ++++++++++++++------------------- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 1127d02f0a8221..0a066399e27ca8 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -303,6 +303,7 @@ Backwards incompatible API changes - A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`) - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`) - Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`) +- ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`) .. _whatsnew_0240.api_breaking.deps: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 4c08ee89c33dfc..ff9ee9ed7296a5 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -730,12 +730,8 @@ def to_html(self, classes=None, notebook=False, border=None): .. versionadded:: 0.19.0 """ from pandas.io.formats.html import HTMLFormatter - html_renderer = HTMLFormatter(self, classes=classes, - max_rows=self.max_rows, - max_cols=self.max_cols, - notebook=notebook, - border=border, - table_id=self.table_id) + html_renderer = HTMLFormatter(self, classes=classes, notebook=notebook, + border=border, table_id=self.table_id) if hasattr(self.buf, 'write'): html_renderer.write_result(self.buf) elif isinstance(self.buf, compat.string_types): diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index bc2de210df3f48..bf92ce7ee0f679 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -25,8 +25,8 @@ class HTMLFormatter(TableFormatter): indent_delta = 2 - def __init__(self, formatter, classes=None, max_rows=None, max_cols=None, - notebook=False, border=None, table_id=None): + def __init__(self, formatter, classes=None, notebook=False, border=None, + table_id=None): self.fmt = formatter self.classes = classes @@ -35,18 +35,21 @@ def __init__(self, formatter, classes=None, max_rows=None, max_cols=None, self.elements = [] self.bold_rows = self.fmt.kwds.get('bold_rows', False) self.escape = self.fmt.kwds.get('escape', True) - - self.max_rows = max_rows or len(self.fmt.frame) - self.max_cols = max_cols or len(self.fmt.columns) self.show_dimensions = self.fmt.show_dimensions - self.is_truncated = (self.max_rows < len(self.fmt.frame) or - self.max_cols < len(self.fmt.columns)) self.notebook = notebook if border is None: border = get_option('display.html.border') self.border = border self.table_id = table_id + @property + def is_truncated(self): + return self.fmt.is_truncated + + @property + def ncols(self): + return len(self.fmt.tr_frame.columns) + def write(self, s, indent=0): rs = pprint_thing(s) self.elements.append(' ' * indent + rs) @@ -301,12 +304,8 @@ def _write_header(self, indent): if all((self.fmt.has_index_names, self.fmt.index, self.fmt.show_index_names)): - row = ([x if x is not None else '' - for x in self.frame.index.names] + - [''] * min(len(self.columns), self.max_cols)) - if truncate_h: - ins_col = row_levels + self.fmt.tr_col_num - row.insert(ins_col, '') + row = ([x if x is not None else '' for x in self.frame.index.names] + + [''] * (self.ncols + (1 if truncate_h else 0))) self.write_tr(row, indent, self.indent_delta, header=True) indent -= self.indent_delta @@ -318,9 +317,7 @@ def _write_body(self, indent): self.write('', indent) indent += self.indent_delta - fmt_values = {} - for i in range(min(len(self.columns), self.max_cols)): - fmt_values[i] = self.fmt._format_col(i) + fmt_values = {i: self.fmt._format_col(i) for i in range(self.ncols)} # write values if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex): @@ -338,7 +335,6 @@ def _write_regular_rows(self, fmt_values, indent): truncate_h = self.fmt.truncate_h truncate_v = self.fmt.truncate_v - ncols = len(self.fmt.tr_frame.columns) nrows = len(self.fmt.tr_frame) if self.fmt.index: @@ -362,7 +358,7 @@ def _write_regular_rows(self, fmt_values, indent): row = [] if self.fmt.index: row.append(index_values[i]) - row.extend(fmt_values[j][i] for j in range(ncols)) + row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: dot_col_ix = self.fmt.tr_col_num + row_levels @@ -376,7 +372,6 @@ def _write_hierarchical_rows(self, fmt_values, indent): truncate_h = self.fmt.truncate_h truncate_v = self.fmt.truncate_v frame = self.fmt.tr_frame - ncols = len(frame.columns) nrows = len(frame) row_levels = self.frame.index.nlevels @@ -454,7 +449,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): j += 1 row.append(v) - row.extend(fmt_values[j][i] for j in range(ncols)) + row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: row.insert(row_levels - sparse_offset + self.fmt.tr_col_num, '...') @@ -466,7 +461,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): sparsify=False, adjoin=False, names=False))) row = [] row.extend(idx_values[i]) - row.extend(fmt_values[j][i] for j in range(ncols)) + row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: row.insert(row_levels + self.fmt.tr_col_num, '...') self.write_tr(row, indent, self.indent_delta, tags=None, From 6af9c30023df41cb333b08032f54d8b1fda4f922 Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Tue, 27 Nov 2018 03:50:22 +0100 Subject: [PATCH 08/78] CLN/DEPS: Clean up post numpy bump to 1.12 (#23796) --- README.md | 2 +- ci/azure/linux.yml | 2 +- pandas/_libs/algos_rank_helper.pxi.in | 10 +------ pandas/_libs/lib.pyx | 4 +-- pandas/_libs/sparse.pyx | 8 ------ pandas/_libs/sparse_op_helper.pxi.in | 7 ----- pandas/core/arrays/categorical.py | 12 ++------ pandas/core/arrays/datetimelike.py | 3 +- pandas/core/arrays/sparse.py | 3 -- pandas/core/dtypes/cast.py | 26 +++-------------- pandas/core/dtypes/dtypes.py | 7 +---- pandas/core/indexes/datetimes.py | 5 ---- pandas/core/internals/blocks.py | 22 ++------------- pandas/core/internals/managers.py | 28 +++---------------- pandas/core/reshape/tile.py | 3 +- pandas/io/packers.py | 2 +- pandas/io/pickle.py | 8 ------ pandas/plotting/_misc.py | 3 -- pandas/tests/arrays/categorical/test_repr.py | 1 - pandas/tests/dtypes/test_inference.py | 5 +--- pandas/tests/frame/test_constructors.py | 8 +++--- pandas/tests/frame/test_operators.py | 1 - pandas/tests/indexes/common.py | 5 +--- pandas/tests/indexes/multi/test_analytics.py | 5 +--- .../indexing/test_chaining_and_caching.py | 1 - pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/io/test_pytables.py | 2 -- pandas/tests/plotting/test_datetimelike.py | 1 - pandas/tests/plotting/test_frame.py | 13 +++------ pandas/tests/plotting/test_series.py | 4 +-- pandas/tests/series/test_analytics.py | 10 ++----- pandas/tests/test_base.py | 9 +++--- pandas/tests/test_nanops.py | 1 - pandas/tests/test_panel.py | 1 - pandas/tests/test_sorting.py | 7 ----- 35 files changed, 40 insertions(+), 191 deletions(-) diff --git a/README.md b/README.md index b4dedecb4c6971..1993b1ecb9dc1b 100644 --- a/README.md +++ b/README.md @@ -171,7 +171,7 @@ pip install pandas ``` ## Dependencies -- [NumPy](https://www.numpy.org): 1.9.0 or higher +- [NumPy](https://www.numpy.org): 1.12.0 or higher - [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher - [pytz](https://pythonhosted.org/pytz): 2011k or higher diff --git a/ci/azure/linux.yml b/ci/azure/linux.yml index a773a06c193d45..7fa8a9a1783f90 100644 --- a/ci/azure/linux.yml +++ b/ci/azure/linux.yml @@ -9,7 +9,7 @@ jobs: strategy: maxParallel: 11 matrix: - py27_np_19: + py27_np_120: ENV_FILE: ci/deps/azure-27-compat.yaml CONDA_PY: "27" TEST_ARGS: "--skip-slow --skip-network" diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index 5ffc6dd5780235..5dac94394c7ed0 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -102,15 +102,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ranks = np.empty(n, dtype='f8') {{if dtype == 'object'}} - - try: - _as = np.lexsort(keys=order) - except TypeError: - # lexsort on object array will raise TypeError for numpy version - # earlier than 1.11.0. Use argsort with order argument instead. - _dt = [('values', 'O'), ('mask', '?')] - _values = np.asarray(list(zip(order[0], order[1])), dtype=_dt) - _as = np.argsort(_values, kind='mergesort', order=('mask', 'values')) + _as = np.lexsort(keys=order) {{else}} if tiebreak == TIEBREAK_FIRST: # need to use a stable sort here diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index ad538ff103c2f3..874206378f79cd 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -518,9 +518,7 @@ def astype_intsafe(ndarray[object] arr, new_dtype): bint is_datelike ndarray result - # on 32-bit, 1.6.2 numpy M8[ns] is a subdtype of integer, which is weird - is_datelike = new_dtype in ['M8[ns]', 'm8[ns]'] - + is_datelike = new_dtype == 'm8[ns]' result = np.empty(n, dtype=new_dtype) for i in range(n): val = arr[i] diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 668bd0ae6bbb75..f5980998f6db47 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -8,14 +8,6 @@ from numpy cimport (ndarray, uint8_t, int64_t, int32_t, int16_t, int8_t, cnp.import_array() -from distutils.version import LooseVersion - -# numpy versioning -_np_version = np.version.short_version -_np_version_under1p10 = LooseVersion(_np_version) < LooseVersion('1.10') -_np_version_under1p11 = LooseVersion(_np_version) < LooseVersion('1.11') - - # ----------------------------------------------------------------------------- # Preamble stuff diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index 1f41096a3f1942..c6621ab5977caf 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -42,13 +42,6 @@ cdef inline sparse_t __mod__(sparse_t a, sparse_t b): cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b): if b == 0: if sparse_t is float64_t: - # numpy >= 1.11 returns NaN - # for a // 0, rather than +-inf - if _np_version_under1p11: - if a > 0: - return INF - elif a < 0: - return -INF return NaN else: return 0 diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6dc3a960dc817b..24df4c7a9f3798 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -98,7 +98,7 @@ def f(self, other): ret[na_mask] = False return ret - # Numpy-1.9 and earlier may convert a scalar to a zerodim array during + # Numpy < 1.13 may convert a scalar to a zerodim array during # comparison operation when second arg has higher priority, e.g. # # cat[0] < cat @@ -2038,15 +2038,7 @@ def __setitem__(self, key, value): elif isinstance(key, slice): pass - # Array of True/False in Series or Categorical - else: - # There is a bug in numpy, which does not accept a Series as a - # indexer - # https://github.com/pandas-dev/pandas/issues/6168 - # https://github.com/numpy/numpy/issues/4240 -> fixed in numpy 1.9 - # FIXME: remove when numpy 1.9 is the lowest numpy version pandas - # accepts... - key = np.asarray(key) + # else: array of True/False in Series or Categorical lindexer = self.categories.get_indexer(rvalue) lindexer = self._maybe_coerce_indexer(lindexer) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4e784d9c89c5f1..83ee335aa54657 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -835,8 +835,7 @@ def __isub__(self, other): def _evaluate_compare(self, other, op): """ We have been called because a comparison between - 8 aware arrays. numpy >= 1.11 will - now warn about NaT comparisons + 8 aware arrays. numpy will warn about NaT comparisons """ # Called by comparison methods when comparing datetimelike # with datetimelike diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 24e33c32d08986..9a5ef3b3a7dd0e 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -1015,9 +1015,6 @@ def __getitem__(self, key): key = np.asarray(key) if com.is_bool_indexer(key) and len(self) == len(key): - # TODO(numpy 1.11): Remove this asarray. - # Old NumPy didn't treat array-like as boolean masks. - key = np.asarray(key) return self.take(np.arange(len(key), dtype=np.int32)[key]) elif hasattr(key, '__len__'): return self.take(key) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3c5f8830441f71..afe6ba45bb4005 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -263,28 +263,10 @@ def maybe_promote(dtype, fill_value=np.nan): fill_value = np.nan # returns tuple of (dtype, fill_value) - if issubclass(dtype.type, (np.datetime64, np.timedelta64)): - # for now: refuse to upcast datetime64 - # (this is because datetime64 will not implicitly upconvert - # to object correctly as of numpy 1.6.1) - if isna(fill_value): - fill_value = iNaT - else: - if issubclass(dtype.type, np.datetime64): - try: - fill_value = tslibs.Timestamp(fill_value).value - except Exception: - # the proper thing to do here would probably be to upcast - # to object (but numpy 1.6.1 doesn't do this properly) - fill_value = iNaT - elif issubclass(dtype.type, np.timedelta64): - try: - fill_value = tslibs.Timedelta(fill_value).value - except Exception: - # as for datetimes, cannot upcast to object - fill_value = iNaT - else: - fill_value = iNaT + if issubclass(dtype.type, np.datetime64): + fill_value = tslibs.Timestamp(fill_value).value + elif issubclass(dtype.type, np.timedelta64): + fill_value = tslibs.Timedelta(fill_value).value elif is_datetimetz(dtype): if isna(fill_value): fill_value = iNaT diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index bd7c6630c7c5df..fee983f9692216 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -338,12 +338,7 @@ def _hash_categories(categories, ordered=True): cat_array = [cat_array] hashed = _combine_hash_arrays(iter(cat_array), num_items=len(cat_array)) - if len(hashed) == 0: - # bug in Numpy<1.12 for length 0 arrays. Just return the correct - # value of 0 - return 0 - else: - return np.bitwise_xor.reduce(hashed) + return np.bitwise_xor.reduce(hashed) @classmethod def construct_array_type(cls): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 72bfefaf331514..61e8d6344a0e9e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -415,11 +415,6 @@ def __setstate__(self, state): self._freq = own_state[1] self._tz = timezones.tz_standardize(own_state[2]) - # provide numpy < 1.7 compat - if nd_state[2] == 'M8[us]': - new_state = np.ndarray.__reduce__(data.astype('M8[ns]')) - np.ndarray.__setstate__(data, new_state[2]) - else: # pragma: no cover data = np.empty(state) np.ndarray.__setstate__(data, state) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 857bf18c5982be..21dae455cac1b5 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1458,11 +1458,6 @@ def quantile(self, qs, interpolation='linear', axis=0, axes=None): def _nanpercentile1D(values, mask, q, **kw): # mask is Union[ExtensionArray, ndarray] - # we convert to an ndarray for NumPy 1.9 compat, which didn't - # treat boolean-like arrays as boolean. This conversion would have - # been done inside ndarray.__getitem__ anyway, since values is - # an ndarray at this point. - mask = np.asarray(mask) values = values[~mask] if len(values) == 0: @@ -2781,9 +2776,7 @@ def set(self, locs, values, check=False): ------- None """ - if values.dtype != _NS_DTYPE: - # Workaround for numpy 1.6 bug - values = conversion.ensure_datetime64ns(values) + values = conversion.ensure_datetime64ns(values, copy=False) self.values[locs] = values @@ -3102,7 +3095,7 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True): # FIXME: optimization potential in case all mgrs contain slices and # combination of those slices is a slice, too. new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) - new_values = _vstack([b.values for b in blocks], dtype) + new_values = np.vstack([b.values for b in blocks]) argsort = np.argsort(new_mgr_locs) new_values = new_values[argsort] @@ -3114,17 +3107,6 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True): return blocks -def _vstack(to_stack, dtype): - - # work around NumPy 1.6 bug - if dtype == _NS_DTYPE or dtype == _TD_DTYPE: - new_values = np.vstack([x.view('i8') for x in to_stack]) - return new_values.view(dtype) - - else: - return np.vstack(to_stack) - - def _block2d_to_blocknd(values, placement, shape, labels, ref_items): """ pivot to the labels shape """ panel_shape = (len(placement),) + shape diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c3762d98191533..5f9860ce98b111 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -248,9 +248,6 @@ def __getstate__(self): def __setstate__(self, state): def unpickle_block(values, mgr_locs): - # numpy < 1.7 pickle compat - if values.dtype == 'M8[us]': - values = values.astype('M8[ns]') return make_block(values, placement=mgr_locs) if (isinstance(state, tuple) and len(state) >= 4 and @@ -776,18 +773,6 @@ def _interleave(self): result = np.empty(self.shape, dtype=dtype) - if result.shape[0] == 0: - # Workaround for numpy 1.7 bug: - # - # >>> a = np.empty((0,10)) - # >>> a[slice(0,0)] - # array([], shape=(0, 10), dtype=float64) - # >>> a[[]] - # Traceback (most recent call last): - # File "", line 1, in - # IndexError: index 0 is out of bounds for axis 0 with size 0 - return result - itemmask = np.zeros(self.shape[0]) for blk in self.blocks: @@ -1170,8 +1155,7 @@ def insert(self, loc, item, value, allow_duplicates=False): blk.mgr_locs = new_mgr_locs if loc == self._blklocs.shape[0]: - # np.append is a lot faster (at least in numpy 1.7.1), let's use it - # if we can. + # np.append is a lot faster, let's use it if we can. self._blklocs = np.append(self._blklocs, 0) self._blknos = np.append(self._blknos, len(self.blocks)) else: @@ -1995,13 +1979,9 @@ def _transform_index(index, func, level=None): def _fast_count_smallints(arr): """Faster version of set(arr) for sequences of small numbers.""" - if len(arr) == 0: - # Handle empty arr case separately: numpy 1.6 chokes on that. - return np.empty((0, 2), dtype=arr.dtype) - else: - counts = np.bincount(arr.astype(np.int_)) - nz = counts.nonzero()[0] - return np.c_[nz, counts[nz]] + counts = np.bincount(arr.astype(np.int_)) + nz = counts.nonzero()[0] + return np.c_[nz, counts[nz]] def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill): diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 4a863372eea137..8ad2a48e8767c4 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -334,8 +334,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, ids = ensure_int64(bins.searchsorted(x, side=side)) if include_lowest: - # Numpy 1.9 support: ensure this mask is a Numpy array - ids[np.asarray(x == bins[0])] = 1 + ids[x == bins[0]] = 1 na_mask = isna(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() diff --git a/pandas/io/packers.py b/pandas/io/packers.py index cec7908f143a8f..7fc770efbeb377 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -250,7 +250,7 @@ def dtype_for(t): 'complex128': np.float64, 'complex64': np.float32} -# numpy 1.6.1 compat +# windows (32 bit) compat if hasattr(np, 'float128'): c2f_dict['complex256'] = np.float128 diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index c89d1df8ee64ba..789f55a62dc58c 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -6,8 +6,6 @@ from pandas.compat import PY3, BytesIO, cPickle as pkl, pickle_compat as pc -from pandas.core.dtypes.common import _NS_DTYPE, is_datetime64_dtype - from pandas.io.common import _get_handle, _stringify_path @@ -200,10 +198,4 @@ def _pickle_array(arr): def _unpickle_array(bytes): arr = read_array(BytesIO(bytes)) - # All datetimes should be stored as M8[ns]. When unpickling with - # numpy1.6, it will read these as M8[us]. So this ensures all - # datetime64 types are read as MS[ns] - if is_datetime64_dtype(arr): - arr = arr.view(_NS_DTYPE) - return arr diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index aeb97a84e594af..dbad5a04161c98 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -138,9 +138,6 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, def _get_marker_compat(marker): import matplotlib.lines as mlines - import matplotlib as mpl - if mpl.__version__ < '1.1.0' and marker == '.': - return 'o' if marker not in mlines.lineMarkers: return 'o' return marker diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 5f71d0148ee88f..227edf60951e68 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -37,7 +37,6 @@ def test_big_print(self): def test_empty_print(self): factor = Categorical([], ["a", "b", "c"]) expected = ("[], Categories (3, object): [a, b, c]") - # hack because array_repr changed in numpy > 1.6.x actual = repr(factor) assert actual == expected diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index fd3222cd1119bd..20ad39e137d466 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1272,10 +1272,7 @@ def test_nan_to_nat_conversions(): s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) assert (isna(s[8])) - # numpy < 1.7.0 is wrong - from distutils.version import LooseVersion - if LooseVersion(np.__version__) >= LooseVersion('1.7.0'): - assert (s[8].value == np.datetime64('NaT').astype(np.int64)) + assert (s[8].value == np.datetime64('NaT').astype(np.int64)) @td.skip_if_no_scipy diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c71d5d9f977f69..76e92042cbe6a4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -15,7 +15,7 @@ from pandas.core.dtypes.common import is_integer_dtype from pandas.compat import (lmap, long, zip, range, lrange, lzip, - OrderedDict, is_platform_little_endian, PY36) + OrderedDict, is_platform_little_endian, PY3, PY36) from pandas import compat from pandas import (DataFrame, Index, Series, isna, MultiIndex, Timedelta, Timestamp, @@ -164,9 +164,9 @@ def test_constructor_dtype_str_na_values(self, string_dtype): def test_constructor_rec(self): rec = self.frame.to_records(index=False) - - # Assigning causes segfault in NumPy < 1.5.1 - # rec.dtype.names = list(rec.dtype.names)[::-1] + if PY3: + # unicode error under PY2 + rec.dtype.names = list(rec.dtype.names)[::-1] index = self.frame.index diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index bbe4914b5f4472..88c64bf9e9b978 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -190,7 +190,6 @@ def _check_unary_op(op): _check_bin_op(operator.or_) _check_bin_op(operator.xor) - # operator.neg is deprecated in numpy >= 1.9 _check_unary_op(operator.inv) # TODO: belongs elsewhere def test_logical_with_nas(self): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 9f5885fb80bba3..0c886b9fd3c4ba 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -663,12 +663,9 @@ def test_equals_op(self): tm.assert_series_equal(series_a == item, Series(expected3)) def test_numpy_ufuncs(self): - # test ufuncs of numpy 1.9.2. see: + # test ufuncs of numpy, see: # http://docs.scipy.org/doc/numpy/reference/ufuncs.html - # some functions are skipped because it may return different result - # for unicode input depending on numpy version - for name, idx in compat.iteritems(self.indices): for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index 05adaada01ee50..3b40b2afe9c6d8 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -275,12 +275,9 @@ def test_map_dictlike(idx, mapper): np.rad2deg ]) def test_numpy_ufuncs(func): - # test ufuncs of numpy 1.9.2. see: + # test ufuncs of numpy. see: # http://docs.scipy.org/doc/numpy/reference/ufuncs.html - # some functions are skipped because it may return different result - # for unicode input depending on numpy version - # copy and paste from idx fixture as pytest doesn't support # parameters and fixtures at the same time. major_axis = Index(['foo', 'bar', 'baz', 'qux']) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 8bc8cb3fb15354..f012c9c255cd91 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -94,7 +94,6 @@ class TestChaining(object): def test_setitem_chained_setfault(self): # GH6026 - # setfaults under numpy 1.7.1 (ok on 1.8) data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout'] mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none'] diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 53d07aeef304aa..85b06001cf8a0d 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -337,7 +337,7 @@ def test_iloc_setitem_list(self): tm.assert_frame_equal(df, expected) def test_iloc_setitem_pandas_object(self): - # GH 17193, affecting old numpy (1.7 and 1.8) + # GH 17193 s_orig = Series([0, 1, 2, 3]) expected = Series([0, -1, -2, 3]) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 4a68719eedc9aa..84a0e3d867783e 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -199,8 +199,6 @@ def roundtrip(key, obj, **kwargs): def test_long_strings(self): # GH6166 - # unconversion of long strings was being chopped in earlier - # versions of numpy < 1.7.2 df = DataFrame({'a': tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 4865638671ea92..2e204f6d18d708 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1075,7 +1075,6 @@ def test_irreg_dtypes(self): _, ax = self.plt.subplots() _check_plot_works(df.plot, ax=ax) - @pytest.mark.xfail(not PY3, reason="failing on mpl 1.4.3 on PY2") @pytest.mark.slow def test_time(self): t = datetime(1, 1, 1, 3, 30, 0) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 25dfbaba762c95..f5708b24d22b13 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -69,8 +69,7 @@ def test_plot(self): self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) df = DataFrame({'x': [1, 2], 'y': [3, 4]}) - # mpl >= 1.5.2 (or slightly below) throw AttributError - with pytest.raises((TypeError, AttributeError)): + with pytest.raises(AttributeError, match='Unknown property blarg'): df.plot.line(blarg=True) df = DataFrame(np.random.rand(10, 3), @@ -2967,13 +2966,9 @@ def test_passed_bar_colors(self): def test_rcParams_bar_colors(self): import matplotlib as mpl color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] - try: # mpl 1.5 - with mpl.rc_context( - rc={'axes.prop_cycle': mpl.cycler("color", color_tuples)}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") - except (AttributeError, KeyError): # mpl 1.4 - with mpl.rc_context(rc={'axes.color_cycle': color_tuples}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + with mpl.rc_context( + rc={'axes.prop_cycle': mpl.cycler("color", color_tuples)}): + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] @pytest.mark.parametrize('method', ['line', 'barh', 'bar']) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index dc708278836d2f..e6519c7db7a7bf 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -767,8 +767,8 @@ def test_errorbar_plot(self): s.plot(yerr=np.arange(11)) s_err = ['zzz'] * 10 - # in mpl 1.5+ this is a TypeError - with pytest.raises((ValueError, TypeError)): + # MPL > 2.0.0 will most likely use TypeError here + with pytest.raises((TypeError, ValueError)): s.plot(yerr=s_err) @td.xfail_if_mpl_2_2 diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 0ce9f885eb6389..89d76abaf5f827 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1015,12 +1015,6 @@ def test_clip_with_datetimes(self): def test_cummethods_bool(self): # GH 6270 - # looks like a buggy np.maximum.accumulate for numpy 1.6.1, py 3.2 - def cummin(x): - return np.minimum.accumulate(x) - - def cummax(x): - return np.maximum.accumulate(x) a = pd.Series([False, False, False, True, True, False, False]) b = ~a @@ -1028,8 +1022,8 @@ def cummax(x): d = ~c methods = {'cumsum': np.cumsum, 'cumprod': np.cumprod, - 'cummin': cummin, - 'cummax': cummax} + 'cummin': np.minimum.accumulate, + 'cummax': np.maximum.accumulate} args = product((a, b, c, d), methods) for s, method in args: expected = Series(methods[method](s.values)) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 084477d8202b10..c7efc1efaee8f5 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -292,11 +292,10 @@ def test_none_comparison(self): assert not result.iat[0] assert not result.iat[1] - # this fails for numpy < 1.9 - # and oddly for *some* platforms - # result = None != o # noqa - # assert result.iat[0] - # assert result.iat[1] + result = None != o # noqa + assert result.iat[0] + assert result.iat[1] + if (is_datetime64_dtype(o) or is_datetimetz(o)): # Following DatetimeIndex (and Timestamp) convention, # inequality comparisons with Series[datetime64] raise diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 49dbccb82fac85..e214d4c1985a97 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -464,7 +464,6 @@ def test_nankurt(self): allow_str=False, allow_date=False, allow_tdelta=False) - @td.skip_if_no("numpy", min_version="1.10.0") def test_nanprod(self): self.check_funs(nanops.nanprod, np.prod, allow_str=False, allow_date=False, allow_tdelta=False, diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 6d5d07b00398c0..c0c4e627b1b2e6 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -85,7 +85,6 @@ def test_sum(self): def test_mean(self): self._check_stat_op('mean', np.mean) - @td.skip_if_no("numpy", min_version="1.10.0") def test_prod(self): self._check_stat_op('prod', np.prod, skipna_alternative=np.nanprod) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 22e758a0e59a72..333b93dbdf580e 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -127,13 +127,6 @@ def test_nargsort(self): # np.argsort(items2) may not place NaNs first items2 = np.array(items, dtype='O') - try: - # GH 2785; due to a regression in NumPy1.6.2 - np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) - np.argsort(items2, kind='mergesort') - except TypeError: - pytest.skip('requested sort not available for type') - # mergesort is the most difficult to get right because we want it to be # stable. From 90961f2acad5c2db6aa957e2a79df8668f5b02d1 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 27 Nov 2018 03:51:35 +0100 Subject: [PATCH 09/78] DOC: Fix PEP-8 issues in 10min.rst (#23908) --- doc/source/10min.rst | 64 +++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/doc/source/10min.rst b/doc/source/10min.rst index b5938a24ce6c50..ddcb9b5fc26c75 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -5,19 +5,19 @@ .. ipython:: python :suppress: + import os import numpy as np + import pandas as pd - import os + np.random.seed(123456) np.set_printoptions(precision=4, suppress=True) - import matplotlib - # matplotlib.style.use('default') pd.options.display.max_rows = 15 - #### portions of this were borrowed from the - #### Pandas cheatsheet - #### created during the PyData Workshop-Sprint 2012 - #### Hannah Chen, Henry Chow, Eric Cox, Robert Mauriello + # portions of this were borrowed from the + # Pandas cheatsheet + # created during the PyData Workshop-Sprint 2012 + # Hannah Chen, Henry Chow, Eric Cox, Robert Mauriello ******************** @@ -31,9 +31,8 @@ Customarily, we import as follows: .. ipython:: python - import pandas as pd import numpy as np - import matplotlib.pyplot as plt + import pandas as pd Object Creation --------------- @@ -55,7 +54,7 @@ and labeled columns: dates = pd.date_range('20130101', periods=6) dates - df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD')) + df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD')) df Creating a ``DataFrame`` by passing a dict of objects that can be converted to series-like. @@ -64,7 +63,7 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s df2 = pd.DataFrame({'A': 1., 'B': pd.Timestamp('20130102'), - 'C': pd.Series(1, index=list(range(4)),dtype='float32'), + 'C': pd.Series(1, index=list(range(4)), dtype='float32'), 'D': np.array([3] * 4, dtype='int32'), 'E': pd.Categorical(["test", "train", "test", "train"]), 'F': 'foo'}) @@ -190,31 +189,31 @@ Selecting on a multi-axis by label: .. ipython:: python - df.loc[:,['A','B']] + df.loc[:, ['A', 'B']] Showing label slicing, both endpoints are *included*: .. ipython:: python - df.loc['20130102':'20130104',['A','B']] + df.loc['20130102':'20130104', ['A', 'B']] Reduction in the dimensions of the returned object: .. ipython:: python - df.loc['20130102',['A','B']] + df.loc['20130102', ['A', 'B']] For getting a scalar value: .. ipython:: python - df.loc[dates[0],'A'] + df.loc[dates[0], 'A'] For getting fast access to a scalar (equivalent to the prior method): .. ipython:: python - df.at[dates[0],'A'] + df.at[dates[0], 'A'] Selection by Position ~~~~~~~~~~~~~~~~~~~~~ @@ -231,37 +230,37 @@ By integer slices, acting similar to numpy/python: .. ipython:: python - df.iloc[3:5,0:2] + df.iloc[3:5, 0:2] By lists of integer position locations, similar to the numpy/python style: .. ipython:: python - df.iloc[[1,2,4],[0,2]] + df.iloc[[1, 2, 4], [0, 2]] For slicing rows explicitly: .. ipython:: python - df.iloc[1:3,:] + df.iloc[1:3, :] For slicing columns explicitly: .. ipython:: python - df.iloc[:,1:3] + df.iloc[:, 1:3] For getting a value explicitly: .. ipython:: python - df.iloc[1,1] + df.iloc[1, 1] For getting fast access to a scalar (equivalent to the prior method): .. ipython:: python - df.iat[1,1] + df.iat[1, 1] Boolean Indexing ~~~~~~~~~~~~~~~~ @@ -303,19 +302,19 @@ Setting values by label: .. ipython:: python - df.at[dates[0],'A'] = 0 + df.at[dates[0], 'A'] = 0 Setting values by position: .. ipython:: python - df.iat[0,1] = 0 + df.iat[0, 1] = 0 Setting by assigning with a NumPy array: .. ipython:: python - df.loc[:,'D'] = np.array([5] * len(df)) + df.loc[:, 'D'] = np.array([5] * len(df)) The result of the prior setting operations. @@ -345,7 +344,7 @@ returns a copy of the data. .. ipython:: python df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E']) - df1.loc[dates[0]:dates[1],'E'] = 1 + df1.loc[dates[0]:dates[1], 'E'] = 1 df1 To drop any rows that have missing data. @@ -653,7 +652,8 @@ pandas can include categorical data in a ``DataFrame``. For full docs, see the .. ipython:: python - df = pd.DataFrame({"id":[1, 2, 3, 4, 5, 6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) + df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], + "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) Convert the raw grades to a categorical data type. @@ -674,7 +674,8 @@ Reorder the categories and simultaneously add the missing categories (methods un .. ipython:: python - df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"]) + df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", + "good", "very good"]) df["grade"] Sorting is per order in the categories, not lexical order. @@ -703,7 +704,8 @@ See the :ref:`Plotting ` docs. .. ipython:: python - ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000)) + ts = pd.Series(np.random.randn(1000), + index=pd.date_range('1/1/2000', periods=1000)) ts = ts.cumsum() @savefig series_plot_basic.png @@ -718,8 +720,10 @@ of the columns with labels: columns=['A', 'B', 'C', 'D']) df = df.cumsum() + plt.figure() + df.plot() @savefig frame_plot_basic.png - plt.figure(); df.plot(); plt.legend(loc='best') + plt.legend(loc='best') Getting Data In/Out ------------------- From 448c9bc32ef49a40bef5f4c0f7d0fc055cd0d4a7 Mon Sep 17 00:00:00 2001 From: Varad Gunjal Date: Tue, 27 Nov 2018 15:46:58 +0530 Subject: [PATCH 10/78] DOC: Capitalize docstring summaries (#23886) --- pandas/core/arrays/timedeltas.py | 8 +- pandas/core/frame.py | 130 ++++++----- pandas/core/groupby/base.py | 11 +- pandas/core/groupby/groupby.py | 130 ++++++----- pandas/core/indexes/base.py | 332 ++++++++++++++++++---------- pandas/core/indexes/datetimelike.py | 45 ++-- pandas/core/indexes/multi.py | 6 +- pandas/core/indexes/range.py | 2 +- pandas/core/panel.py | 66 +++--- pandas/core/resample.py | 139 +++++++----- pandas/core/series.py | 154 +++++++------ pandas/core/window.py | 103 +++++---- pandas/io/formats/style.py | 58 +++-- pandas/io/json/json.py | 60 +++-- pandas/io/json/normalize.py | 11 +- pandas/io/pytables.py | 4 +- pandas/tseries/offsets.py | 146 ++++++++---- 17 files changed, 873 insertions(+), 532 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index d1e6d979b554c4..da26966cfa94c3 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -241,7 +241,7 @@ def _add_offset(self, other): def _add_delta(self, delta): """ Add a timedelta-like, Tick, or TimedeltaIndex-like object - to self, yielding a new TimedeltaArray + to self, yielding a new TimedeltaArray. Parameters ---------- @@ -256,7 +256,9 @@ def _add_delta(self, delta): return type(self)(new_values, freq='infer') def _add_datetime_arraylike(self, other): - """Add DatetimeArray/Index or ndarray[datetime64] to TimedeltaArray""" + """ + Add DatetimeArray/Index or ndarray[datetime64] to TimedeltaArray. + """ if isinstance(other, np.ndarray): # At this point we have already checked that dtype is datetime64 from pandas.core.arrays import DatetimeArrayMixin @@ -404,7 +406,7 @@ def total_seconds(self): def to_pytimedelta(self): """ Return Timedelta Array/Index as object ndarray of datetime.timedelta - objects + objects. Returns ------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 592825a7ea63b7..9481689d4099ac 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -285,7 +285,8 @@ class DataFrame(NDFrame): - """ Two-dimensional size-mutable, potentially heterogeneous tabular data + """ + Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes (rows and columns). Arithmetic operations align on both row and column labels. Can be thought of as a dict-like container for Series objects. The primary pandas data structure. @@ -651,10 +652,11 @@ def _repr_fits_vertical_(self): def _repr_fits_horizontal_(self, ignore_width=False): """ Check if full repr fits in horizontal boundaries imposed by the display - options width and max_columns. In case off non-interactive session, no - boundaries apply. + options width and max_columns. + + In case off non-interactive session, no boundaries apply. - ignore_width is here so ipnb+HTML output can behave the way + `ignore_width` is here so ipnb+HTML output can behave the way users expect. display.max_columns remains in effect. GH3541, GH3573 """ @@ -702,14 +704,16 @@ def _repr_fits_horizontal_(self, ignore_width=False): return repr_width < width def _info_repr(self): - """True if the repr should show the info view.""" + """ + True if the repr should show the info view. + """ info_repr_option = (get_option("display.large_repr") == "info") return info_repr_option and not (self._repr_fits_horizontal_() and self._repr_fits_vertical_()) def __unicode__(self): """ - Return a string representation for a particular DataFrame + Return a string representation for a particular DataFrame. Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. @@ -734,6 +738,7 @@ def __unicode__(self): def _repr_html_(self): """ Return a html representation for a particular DataFrame. + Mainly for IPython notebook. """ # qtconsole doesn't report its line width, and also @@ -974,12 +979,14 @@ def itertuples(self, index=True, name="Pandas"): items = iteritems def __len__(self): - """Returns length of info axis, but here we use the index """ + """ + Returns length of info axis, but here we use the index. + """ return len(self.index) def dot(self, other): """ - Matrix multiplication with DataFrame or Series objects. Can also be + Matrix multiplication with DataFrame or Series objects. Can also be called using `self @ other` in Python >= 3.5. Parameters @@ -1024,11 +1031,15 @@ def dot(self, other): raise TypeError('unsupported type: {oth}'.format(oth=type(other))) def __matmul__(self, other): - """ Matrix multiplication using binary `@` operator in Python>=3.5 """ + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ return self.dot(other) def __rmatmul__(self, other): - """ Matrix multiplication using binary `@` operator in Python>=3.5 """ + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ return self.T.dot(np.transpose(other)).T # ---------------------------------------------------------------------- @@ -1354,7 +1365,7 @@ def to_gbq(self, destination_table, project_id=None, chunksize=None, def from_records(cls, data, index=None, exclude=None, columns=None, coerce_float=False, nrows=None): """ - Convert structured or record ndarray to DataFrame + Convert structured or record ndarray to DataFrame. Parameters ---------- @@ -1579,7 +1590,8 @@ def to_records(self, index=True, convert_datetime64=None): @classmethod def from_items(cls, items, columns=None, orient='columns'): - """Construct a dataframe from a list of tuples + """ + Construct a DataFrame from a list of tuples. .. deprecated:: 0.23.0 `from_items` is deprecated and will be removed in a future version. @@ -1673,7 +1685,8 @@ def _from_arrays(cls, arrays, columns, index, dtype=None): def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, encoding=None, tupleize_cols=None, infer_datetime_format=False): - """Read CSV file. + """ + Read CSV file. .. deprecated:: 0.21.0 Use :func:`pandas.read_csv` instead. @@ -1953,7 +1966,7 @@ def to_stata(self, fname, convert_dates=None, write_index=True, def to_feather(self, fname): """ - write out the binary feather-format for DataFrames + Write out the binary feather-format for DataFrames. .. versionadded:: 0.20.0 @@ -2610,7 +2623,8 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover # Getting and setting elements def get_value(self, index, col, takeable=False): - """Quickly retrieve single value at passed column and index + """ + Quickly retrieve single value at passed column and index. .. deprecated:: 0.21.0 Use .at[] or .iat[] accessors instead. @@ -2653,7 +2667,8 @@ def _get_value(self, index, col, takeable=False): _get_value.__doc__ = get_value.__doc__ def set_value(self, index, col, value, takeable=False): - """Put single value at passed column and index + """ + Put single value at passed column and index. .. deprecated:: 0.21.0 Use .at[] or .iat[] accessors instead. @@ -2698,18 +2713,17 @@ def _set_value(self, index, col, value, takeable=False): def _ixs(self, i, axis=0): """ + Parameters + ---------- i : int, slice, or sequence of integers axis : int - """ + Notes + ----- + If slice passed, the resulting data will be a view. + """ # irow if axis == 0: - """ - Notes - ----- - If slice passed, the resulting data will be a view - """ - if isinstance(i, slice): return self[i] else: @@ -2735,12 +2749,6 @@ def _ixs(self, i, axis=0): # icol else: - """ - Notes - ----- - If slice passed, the resulting data will be a view - """ - label = self.columns[i] if isinstance(i, slice): # need to return view @@ -2887,7 +2895,8 @@ def _getitem_frame(self, key): return self.where(key) def query(self, expr, inplace=False, **kwargs): - """Query the columns of a frame with a boolean expression. + """ + Query the columns of a DataFrame with a boolean expression. Parameters ---------- @@ -3223,7 +3232,9 @@ def _box_item_values(self, key, values): return self._box_col_values(values, items) def _box_col_values(self, values, items): - """ provide boxed values for a column """ + """ + Provide boxed values for a column. + """ klass = self._constructor_sliced return klass(values, index=self.index, name=items, fastpath=True) @@ -3289,8 +3300,8 @@ def _setitem_frame(self, key, value): def _ensure_valid_index(self, value): """ - ensure that if we don't have an index, that we can create one from the - passed value + Ensure that if we don't have an index, that we can create one from the + passed value. """ # GH5632, make sure that we are a Series convertible if not len(self.index) and is_list_like(value): @@ -3552,7 +3563,9 @@ def _series(self): return result def lookup(self, row_labels, col_labels): - """Label-based "fancy indexing" function for DataFrame. + """ + Label-based "fancy indexing" function for DataFrame. + Given equal-length arrays of row and column labels, return an array of the values corresponding to each (row, col) pair. @@ -3639,7 +3652,9 @@ def _reindex_columns(self, new_columns, method, copy, level, allow_dups=False) def _reindex_multi(self, axes, copy, fill_value): - """ we are guaranteed non-Nones in the axes! """ + """ + We are guaranteed non-Nones in the axes. + """ new_index, row_indexer = self.index.reindex(axes['index']) new_columns, col_indexer = self.columns.reindex(axes['columns']) @@ -3819,7 +3834,8 @@ def drop(self, labels=None, axis=0, index=None, columns=None, ('inplace', False), ('level', None)]) def rename(self, *args, **kwargs): - """Alter axes labels. + """ + Alter axes labels. Function / dict values must be unique (1-to-1). Labels not contained in a dict / Series will be left as-is. Extra labels listed don't throw an @@ -4473,7 +4489,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, def drop_duplicates(self, subset=None, keep='first', inplace=False): """ Return DataFrame with duplicate rows removed, optionally only - considering certain columns + considering certain columns. Parameters ---------- @@ -4507,7 +4523,7 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False): def duplicated(self, subset=None, keep='first'): """ Return boolean Series denoting duplicate rows, optionally only - considering certain columns + considering certain columns. Parameters ---------- @@ -4884,7 +4900,7 @@ def nsmallest(self, n, columns, keep='first'): def swaplevel(self, i=-2, j=-1, axis=0): """ - Swap levels i and j in a MultiIndex on a particular axis + Swap levels i and j in a MultiIndex on a particular axis. Parameters ---------- @@ -4911,8 +4927,8 @@ def swaplevel(self, i=-2, j=-1, axis=0): def reorder_levels(self, order, axis=0): """ - Rearrange index levels using input order. - May not drop or duplicate levels + Rearrange index levels using input order. May not drop or + duplicate levels. Parameters ---------- @@ -5501,7 +5517,7 @@ def pivot(self, index=None, columns=None, values=None): _shared_docs['pivot_table'] = """ Create a spreadsheet-style pivot table as a DataFrame. The levels in the pivot table will be stored in MultiIndex objects (hierarchical - indexes) on the index and columns of the result DataFrame + indexes) on the index and columns of the result DataFrame. Parameters ----------%s @@ -5803,9 +5819,11 @@ def unstack(self, level=-1, fill_value=None): """ Pivot a level of the (necessarily hierarchical) index labels, returning a DataFrame having a new level of column labels whose inner-most level - consists of the pivoted index labels. If the index is not a MultiIndex, - the output will be a Series (the analogue of stack when the columns are - not a MultiIndex). + consists of the pivoted index labels. + + If the index is not a MultiIndex, the output will be a Series + (the analogue of stack when the columns are not a MultiIndex). + The level involved will automatically get sorted. Parameters @@ -5861,7 +5879,7 @@ def unstack(self, level=-1, fill_value=None): return unstack(self, level, fill_value) _shared_docs['melt'] = (""" - "Unpivots" a DataFrame from wide format to long format, optionally + Unpivots a DataFrame from wide format to long format, optionally leaving identifier variables set. This function is useful to massage a DataFrame into a format where one @@ -6067,8 +6085,7 @@ def _gotitem(self, ): # type: (...) -> Union[Series, DataFrame] """ - sub-classes to define - return a sliced object + Sub-classes to define. Return a sliced object. Parameters ---------- @@ -6819,7 +6836,7 @@ def _series_round(s, decimals): def corr(self, method='pearson', min_periods=1): """ - Compute pairwise correlation of columns, excluding NA/null values + Compute pairwise correlation of columns, excluding NA/null values. Parameters ---------- @@ -7414,7 +7431,9 @@ def idxmax(self, axis=0, skipna=True): return Series(result, index=self._get_agg_axis(axis)) def _get_agg_axis(self, axis_num): - """ let's be explicit about this """ + """ + Let's be explicit about this. + """ if axis_num == 0: return self.columns elif axis_num == 1: @@ -7604,7 +7623,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, def to_timestamp(self, freq=None, how='start', axis=0, copy=True): """ - Cast to DatetimeIndex of timestamps, at *beginning* of period + Cast to DatetimeIndex of timestamps, at *beginning* of period. Parameters ---------- @@ -7640,7 +7659,7 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True): def to_period(self, freq=None, axis=0, copy=True): """ Convert DataFrame from DatetimeIndex to PeriodIndex with desired - frequency (inferred from index if not passed) + frequency (inferred from index if not passed). Parameters ---------- @@ -7780,6 +7799,7 @@ def isin(self, values): def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): """ Segregate Series based on type and coerce into matrices. + Needs to handle a lot of exceptional cases. """ # figure out the index, if necessary @@ -7888,7 +7908,7 @@ def convert(v): def _to_arrays(data, columns, coerce_float=False, dtype=None): """ - Return list of arrays, columns + Return list of arrays, columns. """ if isinstance(data, DataFrame): if columns is not None: @@ -7934,7 +7954,9 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None): def _masked_rec_array_to_mgr(data, index, columns, dtype, copy): - """ extract from a masked rec array and create the manager """ + """ + Extract from a masked rec array and create the manager. + """ # essentially process a record array then fill it fill_value = data.fill_value diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 1bf97690a84ed7..a148f7e0cab87b 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -12,11 +12,15 @@ class GroupByMixin(object): - """ provide the groupby facilities to the mixed object """ + """ + Provide the groupby facilities to the mixed object. + """ @staticmethod def _dispatch(name, *args, **kwargs): - """ dispatch to apply """ + """ + Dispatch to apply. + """ def outer(self, *args, **kwargs): def f(x): @@ -28,8 +32,7 @@ def f(x): def _gotitem(self, key, ndim, subset=None): """ - sub-classes to define - return a sliced object + Sub-classes to define. Return a sliced object. Parameters ---------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b68fdf853ab192..292d4207cf2c5c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -291,7 +291,7 @@ class providing the base-class of operations. class GroupByPlot(PandasObject): """ - Class implementing the .plot attribute for groupby objects + Class implementing the .plot attribute for groupby objects. """ def __init__(self, groupby): @@ -314,7 +314,7 @@ def f(self): @contextmanager def _group_selection_context(groupby): """ - set / reset the _group_selection_context + Set / reset the _group_selection_context. """ groupby._set_group_selection() yield groupby @@ -377,14 +377,16 @@ def __unicode__(self): def _assure_grouper(self): """ - we create the grouper on instantiation - sub-classes may have a different policy + We create the grouper on instantiation sub-classes may have a + different policy. """ pass @property def groups(self): - """ dict {group name -> group labels} """ + """ + Dict {group name -> group labels}. + """ self._assure_grouper() return self.grouper.groups @@ -395,14 +397,16 @@ def ngroups(self): @property def indices(self): - """ dict {group name -> group indices} """ + """ + Dict {group name -> group indices}. + """ self._assure_grouper() return self.grouper.indices def _get_indices(self, names): """ - safe get multiple indices, translate keys for - datelike to underlying repr + Safe get multiple indices, translate keys for + datelike to underlying repr. """ def get_converter(s): @@ -450,7 +454,9 @@ def get_converter(s): return [self.indices.get(name, []) for name in names] def _get_index(self, name): - """ safe get index, translate keys for datelike to underlying repr """ + """ + Safe get index, translate keys for datelike to underlying repr. + """ return self._get_indices([name])[0] @cache_readonly @@ -465,8 +471,10 @@ def _selected_obj(self): def _reset_group_selection(self): """ - Clear group based selection. Used for methods needing to return info on - each group regardless of whether a group selection was previously set. + Clear group based selection. + + Used for methods needing to return info on each group regardless of + whether a group selection was previously set. """ if self._group_selection is not None: # GH12839 clear cached selection too when changing group selection @@ -475,8 +483,9 @@ def _reset_group_selection(self): def _set_group_selection(self): """ - Create group based selection. Used when selection is not passed - directly but instead via a grouper. + Create group based selection. + + Used when selection is not passed directly but instead via a grouper. NOTE: this should be paired with a call to _reset_group_selection """ @@ -617,7 +626,7 @@ def curried(x): def get_group(self, name, obj=None): """ - Constructs NDFrame from group with provided name + Constructs NDFrame from group with provided name. Parameters ---------- @@ -643,7 +652,7 @@ def get_group(self, name, obj=None): def __iter__(self): """ - Groupby iterator + Groupby iterator. Returns ------- @@ -743,11 +752,11 @@ def _cumcount_array(self, ascending=True): def _try_cast(self, result, obj, numeric_only=False): """ - try to cast the result to our obj original type, - we may have roundtripped thru object in the mean-time + Try to cast the result to our obj original type, + we may have roundtripped through object in the mean-time. - if numeric_only is True, then only try to cast numerics - and not datetimelikes + If numeric_only is True, then only try to cast numerics + and not datetimelikes. """ if obj.ndim > 1: @@ -945,8 +954,9 @@ def _apply_filter(self, indices, dropna): class GroupBy(_GroupBy): """ - Class for grouping and aggregating relational data. See aggregate, - transform, and apply functions on this object. + Class for grouping and aggregating relational data. + + See aggregate, transform, and apply functions on this object. It's easiest to use obj.groupby(...) to use GroupBy, but you can also do: @@ -1010,7 +1020,9 @@ class GroupBy(_GroupBy): Number of groups """ def _bool_agg(self, val_test, skipna): - """Shared func to call any / all Cython GroupBy implementations""" + """ + Shared func to call any / all Cython GroupBy implementations. + """ def objs_to_bool(vals): try: @@ -1036,7 +1048,7 @@ def result_to_bool(result): @Appender(_doc_template) def any(self, skipna=True): """ - Returns True if any value in the group is truthful, else False + Returns True if any value in the group is truthful, else False. Parameters ---------- @@ -1049,7 +1061,7 @@ def any(self, skipna=True): @Appender(_doc_template) def all(self, skipna=True): """ - Returns True if all values in the group are truthful, else False + Returns True if all values in the group are truthful, else False. Parameters ---------- @@ -1061,7 +1073,9 @@ def all(self, skipna=True): @Substitution(name='groupby') @Appender(_doc_template) def count(self): - """Compute count of group, excluding missing values""" + """ + Compute count of group, excluding missing values. + """ # defined here for API doc raise NotImplementedError @@ -1127,7 +1141,7 @@ def mean(self, *args, **kwargs): @Appender(_doc_template) def median(self, **kwargs): """ - Compute median of groups, excluding missing values + Compute median of groups, excluding missing values. For multiple groupings, the result index will be a MultiIndex """ @@ -1148,9 +1162,9 @@ def f(x): @Appender(_doc_template) def std(self, ddof=1, *args, **kwargs): """ - Compute standard deviation of groups, excluding missing values + Compute standard deviation of groups, excluding missing values. - For multiple groupings, the result index will be a MultiIndex + For multiple groupings, the result index will be a MultiIndex. Parameters ---------- @@ -1166,9 +1180,9 @@ def std(self, ddof=1, *args, **kwargs): @Appender(_doc_template) def var(self, ddof=1, *args, **kwargs): """ - Compute variance of groups, excluding missing values + Compute variance of groups, excluding missing values. - For multiple groupings, the result index will be a MultiIndex + For multiple groupings, the result index will be a MultiIndex. Parameters ---------- @@ -1192,9 +1206,9 @@ def var(self, ddof=1, *args, **kwargs): @Appender(_doc_template) def sem(self, ddof=1): """ - Compute standard error of the mean of groups, excluding missing values + Compute standard error of the mean of groups, excluding missing values. - For multiple groupings, the result index will be a MultiIndex + For multiple groupings, the result index will be a MultiIndex. Parameters ---------- @@ -1207,7 +1221,9 @@ def sem(self, ddof=1): @Substitution(name='groupby') @Appender(_doc_template) def size(self): - """Compute group sizes""" + """ + Compute group sizes. + """ result = self.grouper.size() if isinstance(self.obj, Series): @@ -1216,7 +1232,9 @@ def size(self): @classmethod def _add_numeric_operations(cls): - """ add numeric operations to the GroupBy generically """ + """ + Add numeric operations to the GroupBy generically. + """ def groupby_function(name, alias, npfunc, numeric_only=True, _convert=False, @@ -1293,7 +1311,8 @@ def last(x): @Appender(_doc_template) def ohlc(self): """ - Compute sum of values, excluding missing values + Compute sum of values, excluding missing values. + For multiple groupings, the result index will be a MultiIndex """ @@ -1421,9 +1440,7 @@ def resample(self, rule, *args, **kwargs): @Appender(_doc_template) def rolling(self, *args, **kwargs): """ - Return a rolling grouper, providing rolling - functionality per group - + Return a rolling grouper, providing rolling functionality per group. """ from pandas.core.window import RollingGroupby return RollingGroupby(self, *args, **kwargs) @@ -1433,14 +1450,14 @@ def rolling(self, *args, **kwargs): def expanding(self, *args, **kwargs): """ Return an expanding grouper, providing expanding - functionality per group - + functionality per group. """ from pandas.core.window import ExpandingGroupby return ExpandingGroupby(self, *args, **kwargs) def _fill(self, direction, limit=None): - """Shared function for `pad` and `backfill` to call Cython method + """ + Shared function for `pad` and `backfill` to call Cython method. Parameters ---------- @@ -1474,7 +1491,7 @@ def _fill(self, direction, limit=None): @Substitution(name='groupby') def pad(self, limit=None): """ - Forward fill the values + Forward fill the values. Parameters ---------- @@ -1494,7 +1511,7 @@ def pad(self, limit=None): @Substitution(name='groupby') def backfill(self, limit=None): """ - Backward fill the values + Backward fill the values. Parameters ---------- @@ -1830,7 +1847,9 @@ def rank(self, method='average', ascending=True, na_option='keep', @Substitution(name='groupby') @Appender(_doc_template) def cumprod(self, axis=0, *args, **kwargs): - """Cumulative product for each group""" + """ + Cumulative product for each group. + """ nv.validate_groupby_func('cumprod', args, kwargs, ['numeric_only', 'skipna']) if axis != 0: @@ -1841,7 +1860,9 @@ def cumprod(self, axis=0, *args, **kwargs): @Substitution(name='groupby') @Appender(_doc_template) def cumsum(self, axis=0, *args, **kwargs): - """Cumulative sum for each group""" + """ + Cumulative sum for each group. + """ nv.validate_groupby_func('cumsum', args, kwargs, ['numeric_only', 'skipna']) if axis != 0: @@ -1852,7 +1873,9 @@ def cumsum(self, axis=0, *args, **kwargs): @Substitution(name='groupby') @Appender(_doc_template) def cummin(self, axis=0, **kwargs): - """Cumulative min for each group""" + """ + Cumulative min for each group. + """ if axis != 0: return self.apply(lambda x: np.minimum.accumulate(x, axis)) @@ -1861,7 +1884,9 @@ def cummin(self, axis=0, **kwargs): @Substitution(name='groupby') @Appender(_doc_template) def cummax(self, axis=0, **kwargs): - """Cumulative max for each group""" + """ + Cumulative max for each group. + """ if axis != 0: return self.apply(lambda x: np.maximum.accumulate(x, axis)) @@ -1873,7 +1898,8 @@ def _get_cythonized_result(self, how, grouper, aggregate=False, result_is_index=False, pre_processing=None, post_processing=None, **kwargs): - """Get result for Cythonized functions + """ + Get result for Cythonized functions. Parameters ---------- @@ -1968,7 +1994,7 @@ def _get_cythonized_result(self, how, grouper, aggregate=False, @Appender(_doc_template) def shift(self, periods=1, freq=None, axis=0): """ - Shift each group by periods observations + Shift each group by periods observations. Parameters ---------- @@ -1991,7 +2017,9 @@ def shift(self, periods=1, freq=None, axis=0): @Appender(_doc_template) def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, axis=0): - """Calculate pct_change of each value to previous entry in group""" + """ + Calculate pct_change of each value to previous entry in group. + """ if freq is not None or axis != 0: return self.apply(lambda x: x.pct_change(periods=periods, fill_method=fill_method, @@ -2035,7 +2063,7 @@ def head(self, n=5): @Appender(_doc_template) def tail(self, n=5): """ - Returns last n rows of each group + Returns last n rows of each group. Essentially equivalent to ``.apply(lambda x: x.tail(n))``, except ignores as_index flag. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 28aefb652adb0c..22c348acaf3413 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -150,8 +150,9 @@ class InvalidIndexError(Exception): def _new_Index(cls, d): - """ This is called upon unpickling, rather than the default which doesn't - have arguments and breaks __new__ + """ + This is called upon unpickling, rather than the default which doesn't + have arguments and breaks __new__. """ # required for backward compat, because PI can't be instantiated with # ordinals through __new__ GH #13277 @@ -164,7 +165,7 @@ def _new_Index(cls, d): class Index(IndexOpsMixin, PandasObject): """ Immutable ndarray implementing an ordered, sliceable set. The basic object - storing axis labels for all pandas objects + storing axis labels for all pandas objects. Parameters ---------- @@ -490,8 +491,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, @classmethod def _simple_new(cls, values, name=None, dtype=None, **kwargs): """ - we require the we have a dtype compat for the values - if we are passed a non-dtype compat, then coerce using the constructor + We require that we have a dtype compat for the values. If we are passed + a non-dtype compat, then coerce using the constructor. Must be careful not to recurse. """ @@ -518,9 +519,9 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs): return result._reset_identity() _index_shared_docs['_shallow_copy'] = """ - create a new Index with the same class as the caller, don't copy the + Create a new Index with the same class as the caller, don't copy the data, use the same object attributes with passed in attributes taking - precedence + precedence. *this is an internal non-public method* @@ -550,9 +551,9 @@ def _shallow_copy(self, values=None, **kwargs): def _shallow_copy_with_infer(self, values, **kwargs): """ - create a new Index inferring the class with passed value, don't copy + Create a new Index inferring the class with passed value, don't copy the data, use the same object attributes with passed in attributes - taking precedence + taking precedence. *this is an internal non-public method* @@ -575,11 +576,11 @@ def _shallow_copy_with_infer(self, values, **kwargs): def _deepcopy_if_needed(self, orig, copy=False): """ - .. versionadded:: 0.19.0 - Make a copy of self if data coincides (in memory) with orig. Subclasses should override this if self._base is not an ndarray. + .. versionadded:: 0.19.0 + Parameters ---------- orig : ndarray @@ -608,7 +609,9 @@ def _update_inplace(self, result, **kwargs): raise TypeError("Index can't be updated inplace") def _sort_levels_monotonic(self): - """ compat with MultiIndex """ + """ + Compat with MultiIndex. + """ return self _index_shared_docs['_get_grouper_for_level'] = """ @@ -643,7 +646,7 @@ def _get_grouper_for_level(self, mapper, level=None): def is_(self, other): """ - More flexible, faster check like ``is`` but that works through views + More flexible, faster check like ``is`` but that works through views. Note: this is *not* the same as ``Index.identical()``, which checks that metadata is also the same. @@ -662,24 +665,28 @@ def is_(self, other): other, '_id', Ellipsis) and self._id is not None def _reset_identity(self): - """Initializes or resets ``_id`` attribute with new object""" + """ + Initializes or resets ``_id`` attribute with new object. + """ self._id = _Identity() return self # ndarray compat def __len__(self): """ - return the length of the Index + Return the length of the Index. """ return len(self._data) def __array__(self, dtype=None): - """ the array interface, return my values """ + """ + The array interface, return my values. + """ return self._data.view(np.ndarray) def __array_wrap__(self, result, context=None): """ - Gets called after a ufunc + Gets called after a ufunc. """ if is_bool_dtype(result): return result @@ -690,24 +697,31 @@ def __array_wrap__(self, result, context=None): @cache_readonly def dtype(self): - """ return the dtype object of the underlying data """ + """ + Return the dtype object of the underlying data. + """ return self._data.dtype @cache_readonly def dtype_str(self): - """ return the dtype str of the underlying data """ + """ + Return the dtype str of the underlying data. + """ return str(self.dtype) @property def values(self): - """ return the underlying data as an ndarray """ + """ + Return the underlying data as an ndarray. + """ return self._data.view(np.ndarray) @property def _values(self): # type: () -> Union[ExtensionArray, Index, np.ndarray] # TODO(EA): remove index types as they become extension arrays - """The best array representation. + """ + The best array representation. This is an ndarray, ExtensionArray, or Index subclass. This differs from ``_ndarray_values``, which always returns an ndarray. @@ -825,12 +839,12 @@ def repeat(self, repeats, *args, **kwargs): return self._shallow_copy(self._values.repeat(repeats)) _index_shared_docs['where'] = """ - .. versionadded:: 0.19.0 - Return an Index of same shape as self and whose corresponding entries are from self where cond is True and otherwise are from other. + .. versionadded:: 0.19.0 + Parameters ---------- cond : boolean array-like with the same length as self @@ -862,7 +876,7 @@ def where(self, cond, other=None): def ravel(self, order='C'): """ - return an ndarray of the flattened values of the underlying data + Return an ndarray of the flattened values of the underlying data. See Also -------- @@ -927,8 +941,16 @@ def _string_data_error(cls, data): @classmethod def _coerce_to_ndarray(cls, data): - """coerces data to ndarray, raises on scalar data. Converts other - iterables to list first and then to array. Does not touch ndarrays. + """ + Coerces data to ndarray. + + Converts other iterables to list first and then to array. + Does not touch ndarrays. + + Raises + ------ + TypeError + When the data passed in is a scalar. """ if not isinstance(data, (np.ndarray, Index)): @@ -942,7 +964,9 @@ def _coerce_to_ndarray(cls, data): return data def _get_attributes_dict(self): - """ return an attributes dict for my class """ + """ + Return an attributes dict for my class. + """ return {k: getattr(self, k, None) for k in self._attributes} def view(self, cls=None): @@ -959,7 +983,7 @@ def view(self, cls=None): def _coerce_scalar_to_index(self, item): """ - we need to coerce a scalar to a compat for our index type + We need to coerce a scalar to a compat for our index type. Parameters ---------- @@ -1078,13 +1102,13 @@ def _format_space(self): @property def _formatter_func(self): """ - Return the formatter function + Return the formatter function. """ return default_pprint def _format_data(self, name=None): """ - Return the formatted data as a unicode string + Return the formatted data as a unicode string. """ # do we want to justify (only do so for non-objects) @@ -1097,7 +1121,7 @@ def _format_data(self, name=None): def _format_attrs(self): """ - Return a list of tuples of the (attr,formatted_value) + Return a list of tuples of the (attr,formatted_value). """ return format_object_attrs(self) @@ -1124,7 +1148,7 @@ def to_flat_index(self): def to_series(self, index=None, name=None): """ Create a Series with both index and values equal to the index keys - useful with map for returning an indexer based on an index + useful with map for returning an indexer based on an index. Parameters ---------- @@ -1251,7 +1275,9 @@ def astype(self, dtype, copy=True): raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) def _to_safe_for_reshape(self): - """ convert to object if we are a categorical """ + """ + Convert to object if we are a categorical. + """ return self def _assert_can_do_setop(self, other): @@ -1268,11 +1294,15 @@ def _convert_can_do_setop(self, other): return other, result_name def _convert_for_op(self, value): - """ Convert value to be insertable to ndarray """ + """ + Convert value to be insertable to ndarray. + """ return value def _assert_can_do_op(self, value): - """ Check value is valid for scalar op """ + """ + Check value is valid for scalar op. + """ if not is_scalar(value): msg = "'value' must be a scalar, passed: {0}" raise TypeError(msg.format(type(value).__name__)) @@ -1445,7 +1475,7 @@ def _has_complex_internals(self): def _summary(self, name=None): """ - Return a summarized representation + Return a summarized representation. Parameters ---------- @@ -1476,7 +1506,8 @@ def _summary(self, name=None): def summary(self, name=None): """ - Return a summarized representation + Return a summarized representation. + .. deprecated:: 0.23.0 """ warnings.warn("'summary' is deprecated and will be removed in a " @@ -1493,13 +1524,15 @@ def _mpl_repr(self): # introspection @property def is_monotonic(self): - """ alias for is_monotonic_increasing (deprecated) """ + """ + Alias for is_monotonic_increasing. + """ return self.is_monotonic_increasing @property def is_monotonic_increasing(self): """ - return if the index is monotonic increasing (only equal or + Return if the index is monotonic increasing (only equal or increasing) values. Examples @@ -1516,7 +1549,7 @@ def is_monotonic_increasing(self): @property def is_monotonic_decreasing(self): """ - return if the index is monotonic decreasing (only equal or + Return if the index is monotonic decreasing (only equal or decreasing) values. Examples @@ -1532,8 +1565,9 @@ def is_monotonic_decreasing(self): @property def _is_strictly_monotonic_increasing(self): - """return if the index is strictly monotonic increasing - (only increasing) values + """ + Return if the index is strictly monotonic increasing + (only increasing) values. Examples -------- @@ -1548,8 +1582,9 @@ def _is_strictly_monotonic_increasing(self): @property def _is_strictly_monotonic_decreasing(self): - """return if the index is strictly monotonic decreasing - (only decreasing) values + """ + Return if the index is strictly monotonic decreasing + (only decreasing) values. Examples -------- @@ -1567,7 +1602,9 @@ def is_lexsorted_for_tuple(self, tup): @cache_readonly def is_unique(self): - """ return if the index has unique values """ + """ + Return if the index has unique values. + """ return self._engine.is_unique @property @@ -1855,7 +1892,9 @@ def _convert_list_indexer(self, keyarr, kind=None): return None def _invalid_indexer(self, form, key): - """ consistent invalid indexer message """ + """ + Consistent invalid indexer message. + """ raise TypeError("cannot do {form} indexing on {klass} with these " "indexers [{key}] of {kind}".format( form=form, klass=type(self), key=key, @@ -1958,11 +1997,15 @@ def _get_level_number(self, level): @cache_readonly def inferred_type(self): - """ return a string of the type inferred from the values """ + """ + Return a string of the type inferred from the values. + """ return lib.infer_dtype(self) def _is_memory_usage_qualified(self): - """ return a boolean if we need a qualified .info display """ + """ + Return a boolean if we need a qualified .info display. + """ return self.is_object() def is_type_compatible(self, kind): @@ -1980,7 +2023,9 @@ def __reduce__(self): return _new_Index, (self.__class__, d), None def __setstate__(self, state): - """Necessary for making this object picklable""" + """ + Necessary for making this object picklable. + """ if isinstance(state, dict): self._data = state.pop('data') @@ -2014,7 +2059,7 @@ def __nonzero__(self): __bool__ = __nonzero__ _index_shared_docs['__contains__'] = """ - return a boolean if this key is IN the index + Return a boolean if this key is IN the index. Parameters ---------- @@ -2034,7 +2079,7 @@ def __contains__(self, key): return False _index_shared_docs['contains'] = """ - return a boolean if this key is IN the index + Return a boolean if this key is IN the index. Parameters ---------- @@ -2109,7 +2154,7 @@ def _can_hold_identifiers_and_holds_name(self, name): def append(self, other): """ - Append a collection of Index options together + Append a collection of Index options together. Parameters ---------- @@ -2146,13 +2191,13 @@ def _concat(self, to_concat, name): def _concat_same_dtype(self, to_concat, name): """ - Concatenate to_concat which has the same class + Concatenate to_concat which has the same class. """ # must be overridden in specific classes return _concat._concat_index_asobject(to_concat, name) _index_shared_docs['take'] = """ - return a new %(klass)s of the values selected by the indices + Return a new %(klass)s of the values selected by the indices. For internal compatibility with numpy arrays. @@ -2192,7 +2237,9 @@ def take(self, indices, axis=0, allow_fill=True, def _assert_take_fillable(self, values, indices, allow_fill=True, fill_value=None, na_value=np.nan): - """ Internal method to handle NA filling of take """ + """ + Internal method to handle NA filling of take. + """ indices = ensure_platform_int(indices) # only fill if we are passing a non-None fill_value @@ -2211,7 +2258,9 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, @cache_readonly def _isnan(self): - """ return if each value is nan""" + """ + Return if each value is NaN. + """ if self._can_hold_na: return isna(self) else: @@ -2345,7 +2394,7 @@ def notna(self): def putmask(self, mask, value): """ - return a new Index of the values set with the mask + Return a new Index of the values set with the mask. See Also -------- @@ -2364,7 +2413,7 @@ def putmask(self, mask, value): def format(self, name=False, formatter=None, **kwargs): """ - Render a string representation of the Index + Render a string representation of the Index. """ header = [] if name: @@ -2430,7 +2479,9 @@ def to_native_types(self, slicer=None, **kwargs): return values._format_native_types(**kwargs) def _format_native_types(self, na_rep='', quoting=None, **kwargs): - """ actually format my specific types """ + """ + Actually format specific types of the index. + """ mask = isna(self) if not self.is_object() and not quoting: values = np.asarray(self).astype(str) @@ -2461,8 +2512,9 @@ def equals(self, other): return False def identical(self, other): - """Similar to equals, but check that other comparable attributes are - also equal + """ + Similar to equals, but check that other comparable attributes are + also equal. """ return (self.equals(other) and all((getattr(self, c, None) == getattr(other, c, None) @@ -2536,12 +2588,36 @@ def asof(self, label): def asof_locs(self, where, mask): """ - where : array of timestamps - mask : array of booleans where data is not NA + Finds the locations (indices) of the labels from the index for + every entry in the `where` argument. + + As in the `asof` function, if the label (a particular entry in + `where`) is not in the index, the latest index label upto the + passed label is chosen and its index returned. + + If all of the labels in the index are later than a label in `where`, + -1 is returned. + + `mask` is used to ignore NA values in the index during calculation. + + Parameters + ---------- + where : Index + An Index consisting of an array of timestamps. + mask : array-like + Array of booleans denoting where values in the original + data are not NA. + + Returns + ------- + numpy.ndarray + An array of locations (indices) of the labels from the Index + which correspond to the return values of the `asof` function + for every element in `where`. """ locs = self.values[mask].searchsorted(where.values, side='right') - locs = np.where(locs > 0, locs - 1, 0) + result = np.arange(len(self))[mask].take(locs) first = mask.argmax() @@ -2609,8 +2685,7 @@ def sort(self, *args, **kwargs): def sortlevel(self, level=None, ascending=True, sort_remaining=None): """ - - For internal compatibility with with the Index API + For internal compatibility with with the Index API. Sort the Index. This is for compat with MultiIndex @@ -2980,6 +3055,7 @@ def difference(self, other, sort=True): def symmetric_difference(self, other, result_name=None): """ Compute the symmetric difference of two Index objects. + It's sorted if sorting is possible. Parameters @@ -3136,7 +3212,7 @@ def get_loc(self, key, method=None, tolerance=None): def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you - know what you're doing + know what you're doing. """ # if we have something that is Index-like, then @@ -3190,8 +3266,11 @@ def get_value(self, series, key): def set_value(self, arr, key, value): """ - Fast lookup of value from 1-dimensional ndarray. Only use this if you - know what you're doing + Fast lookup of value from 1-dimensional ndarray. + + Notes + ----- + Only use this if you know what you're doing. """ self._engine.set_value(com.values_from_object(arr), com.values_from_object(key), value) @@ -3240,8 +3319,10 @@ def _get_level_values(self, level): def droplevel(self, level=0): """ - Return index with requested level(s) removed. If resulting index has - only 1 level left, the result will be of Index type, not MultiIndex. + Return index with requested level(s) removed. + + If resulting index has only 1 level left, the result will be + of Index type, not MultiIndex. .. versionadded:: 0.23.1 (support for non-MultiIndex) @@ -3408,7 +3489,7 @@ def _get_fill_indexer(self, target, method, limit=None, tolerance=None): def _get_fill_indexer_searchsorted(self, target, method, limit=None): """ Fallback pad/backfill get_indexer that works for monotonic decreasing - indexes and non-monotonic targets + indexes and non-monotonic targets. """ if limit is not None: raise ValueError('limit argument for %r method only well-defined ' @@ -3501,8 +3582,10 @@ def get_indexer_non_unique(self, target): def get_indexer_for(self, target, **kwargs): """ - guaranteed return of an indexer even when non-unique - This dispatches to get_indexer or get_indexer_nonunique as appropriate + Guaranteed return of an indexer even when non-unique. + + This dispatches to get_indexer or get_indexer_nonunique + as appropriate. """ if self.is_unique: return self.get_indexer(target, **kwargs) @@ -3681,9 +3764,7 @@ def isin(self, values, level=None): def _can_reindex(self, indexer): """ - *this is an internal non-public method* - - Check if we are allowing reindexing with this particular indexer + Check if we are allowing reindexing with this particular indexer. Parameters ---------- @@ -3701,7 +3782,8 @@ def _can_reindex(self, indexer): def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ - Create index with target's values (move/add/delete values as necessary) + Create index with target's values (move/add/delete values + as necessary). Parameters ---------- @@ -3758,10 +3840,8 @@ def reindex(self, target, method=None, level=None, limit=None, def _reindex_non_unique(self, target): """ - *this is an internal non-public method* - Create a new index with target's values (move/add/delete values as - necessary) use with non-unique Index and a possibly non-unique target + necessary) use with non-unique Index and a possibly non-unique target. Parameters ---------- @@ -3818,8 +3898,6 @@ def _reindex_non_unique(self, target): return new_index, indexer, new_indexer _index_shared_docs['join'] = """ - *this is an internal non-public method* - Compute join_index and indexers to conform data structures to the new index. @@ -4035,16 +4113,18 @@ def _join_level(self, other, level, how='left', return_indexers=False, """ The join method *only* affects the level of the resulting MultiIndex. Otherwise it just exactly aligns the Index data to the - labels of the level in the MultiIndex. If `keep_order` == True, the - order of the data indexed by the MultiIndex will not be changed; - otherwise, it will tie out with `other`. + labels of the level in the MultiIndex. + + If ```keep_order == True```, the order of the data indexed by the + MultiIndex will not be changed; otherwise, it will tie out + with `other`. """ from .multi import MultiIndex def _get_leaf_sorter(labels): """ - returns sorter for the inner most level while preserving the - order of higher levels + Returns sorter for the inner most level while preserving the + order of higher levels. """ if labels[0].size == 0: return np.empty(0, dtype='int64') @@ -4266,8 +4346,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): def _maybe_cast_indexer(self, key): """ - If we have a float key and are not a floating index - then try to cast to an int if equivalent + If we have a float key and are not a floating index, then try to cast + to an int if equivalent. """ if is_float(key) and not self.is_floating(): @@ -4281,9 +4361,8 @@ def _maybe_cast_indexer(self, key): def _validate_indexer(self, form, key, kind): """ - if we are positional indexer - validate that we have appropriate typed bounds - must be an integer + If we are positional indexer, validate that we have appropriate + typed bounds must be an integer. """ assert kind in ['ix', 'loc', 'getitem', 'iloc'] @@ -4493,7 +4572,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): def delete(self, loc): """ - Make new Index with passed location(-s) deleted + Make new Index with passed location(-s) deleted. Returns ------- @@ -4503,8 +4582,9 @@ def delete(self, loc): def insert(self, loc, item): """ - Make new Index inserting new item at location. Follows - Python list.append semantics for negative values + Make new Index inserting new item at location. + + Follows Python list.append semantics for negative values. Parameters ---------- @@ -4522,7 +4602,7 @@ def insert(self, loc, item): def drop(self, labels, errors='raise'): """ - Make new Index with passed list of labels deleted + Make new Index with passed list of labels deleted. Parameters ---------- @@ -4762,7 +4842,9 @@ def _evaluate_with_datetime_like(self, other, op): @classmethod def _add_comparison_methods(cls): - """ add in comparison methods """ + """ + Add in comparison methods. + """ cls.__eq__ = _make_comparison_op(operator.eq, cls) cls.__ne__ = _make_comparison_op(operator.ne, cls) cls.__lt__ = _make_comparison_op(operator.lt, cls) @@ -4772,7 +4854,9 @@ def _add_comparison_methods(cls): @classmethod def _add_numeric_methods_add_sub_disabled(cls): - """ add in the numeric add/sub methods to disable """ + """ + Add in the numeric add/sub methods to disable. + """ cls.__add__ = make_invalid_op('__add__') cls.__radd__ = make_invalid_op('__radd__') cls.__iadd__ = make_invalid_op('__iadd__') @@ -4782,7 +4866,9 @@ def _add_numeric_methods_add_sub_disabled(cls): @classmethod def _add_numeric_methods_disabled(cls): - """ add in numeric methods to disable other than add/sub """ + """ + Add in numeric methods to disable other than add/sub. + """ cls.__pow__ = make_invalid_op('__pow__') cls.__rpow__ = make_invalid_op('__rpow__') cls.__mul__ = make_invalid_op('__mul__') @@ -4802,12 +4888,15 @@ def _add_numeric_methods_disabled(cls): cls.__inv__ = make_invalid_op('__inv__') def _maybe_update_attributes(self, attrs): - """ Update Index attributes (e.g. freq) depending on op """ + """ + Update Index attributes (e.g. freq) depending on op. + """ return attrs def _validate_for_numeric_unaryop(self, op, opstr): - """ validate if we can perform a numeric unary operation """ - + """ + Validate if we can perform a numeric unary operation. + """ if not self._is_numeric_dtype: raise TypeError("cannot evaluate a numeric op " "{opstr} for type: {typ}" @@ -4815,10 +4904,12 @@ def _validate_for_numeric_unaryop(self, op, opstr): def _validate_for_numeric_binop(self, other, op): """ - return valid other, evaluate or raise TypeError - if we are not of the appropriate type + Return valid other; evaluate or raise TypeError if we are not of + the appropriate type. - internal method called by ops + Notes + ----- + This is an internal method called by ops. """ opstr = '__{opname}__'.format(opname=op.__name__) # if we are an inheritor of numeric, @@ -4858,7 +4949,9 @@ def _validate_for_numeric_binop(self, other, op): @classmethod def _add_numeric_methods_binary(cls): - """ add in numeric methods """ + """ + Add in numeric methods. + """ cls.__add__ = _make_arithmetic_op(operator.add, cls) cls.__radd__ = _make_arithmetic_op(ops.radd, cls) cls.__sub__ = _make_arithmetic_op(operator.sub, cls) @@ -4880,8 +4973,9 @@ def _add_numeric_methods_binary(cls): @classmethod def _add_numeric_methods_unary(cls): - """ add in numeric unary methods """ - + """ + Add in numeric unary methods. + """ def _make_evaluate_unary(op, opstr): def _evaluate_numeric_unary(self): @@ -4905,8 +4999,9 @@ def _add_numeric_methods(cls): @classmethod def _add_logical_methods(cls): - """ add in logical methods """ - + """ + Add in logical methods. + """ _doc = """ %(desc)s @@ -5010,7 +5105,9 @@ def logical_func(self, *args, **kwargs): @classmethod def _add_logical_methods_disabled(cls): - """ add in logical methods to disable """ + """ + Add in logical methods to disable. + """ cls.all = make_invalid_op('all') cls.any = make_invalid_op('any') @@ -5021,7 +5118,8 @@ def _add_logical_methods_disabled(cls): def ensure_index_from_sequences(sequences, names=None): - """Construct an index from sequences of data. + """ + Construct an index from sequences of data. A single sequence returns an Index. Many sequences returns a MultiIndex. @@ -5062,7 +5160,7 @@ def ensure_index_from_sequences(sequences, names=None): def ensure_index(index_like, copy=False): """ - Ensure that we have an index from some index-like object + Ensure that we have an index from some index-like object. Parameters ---------- @@ -5124,7 +5222,9 @@ def ensure_index(index_like, copy=False): def _ensure_has_len(seq): - """If seq is an iterator, put its values into a list.""" + """ + If seq is an iterator, put its values into a list. + """ try: len(seq) except TypeError: @@ -5135,7 +5235,7 @@ def _ensure_has_len(seq): def _trim_front(strings): """ - Trims zeros and decimal points + Trims zeros and decimal points. """ trimmed = strings while len(strings) > 0 and all(x[0] == ' ' for x in trimmed): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1179f6f39d06c9..0e2f7ceb24e94c 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -34,7 +34,9 @@ class DatelikeOps(object): - """ common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex """ + """ + Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex. + """ def strftime(self, date_format): return Index(self.format(date_format=date_format), @@ -76,7 +78,9 @@ def strftime(self, date_format): class TimelikeOps(object): - """ common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex """ + """ + Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex. + """ _round_doc = ( """ @@ -257,7 +261,9 @@ def equals(self, other): @staticmethod def _join_i8_wrapper(joinf, dtype, with_indexers=True): - """ create the join wrapper methods """ + """ + Create the join wrapper methods. + """ @staticmethod def wrapper(left, right): @@ -287,7 +293,7 @@ def _evaluate_compare(self, other, op): def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise', from_utc=False): """ - ensure that we are re-localized + Ensure that we are re-localized. This is for compat as we can then call this on all datetimelike indexes generally (ignored for Period/Timedelta) @@ -320,7 +326,7 @@ def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise', def _box_values_as_index(self): """ - return object Index which contains boxed values + Return object Index which contains boxed values. """ from pandas.core.index import Index return Index(self._box_values(self.asi8), name=self.name, dtype=object) @@ -357,7 +363,7 @@ def map(self, f): def sort_values(self, return_indexer=False, ascending=True): """ - Return sorted copy of Index + Return sorted copy of Index. """ if return_indexer: _as = self.argsort() @@ -411,7 +417,8 @@ def take(self, indices, axis=0, allow_fill=True, @property def asobject(self): - """Return object Index which contains boxed values. + """ + Return object Index which contains boxed values. .. deprecated:: 0.23.0 Use ``astype(object)`` instead. @@ -431,7 +438,7 @@ def _convert_tolerance(self, tolerance, target): def tolist(self): """ - return a list of the underlying data + Return a list of the underlying data. """ return list(self.astype(object)) @@ -466,6 +473,7 @@ def min(self, axis=None, *args, **kwargs): def argmin(self, axis=None, *args, **kwargs): """ Returns the indices of the minimum values along an axis. + See `numpy.ndarray.argmin` for more information on the `axis` parameter. @@ -516,6 +524,7 @@ def max(self, axis=None, *args, **kwargs): def argmax(self, axis=None, *args, **kwargs): """ Returns the indices of the maximum values along an axis. + See `numpy.ndarray.argmax` for more information on the `axis` parameter. @@ -541,7 +550,7 @@ def _formatter_func(self): def _format_attrs(self): """ - Return a list of tuples of the (attr,formatted_value) + Return a list of tuples of the (attr,formatted_value). """ attrs = super(DatetimeIndexOpsMixin, self)._format_attrs() for attrib in self._attributes: @@ -554,8 +563,8 @@ def _format_attrs(self): def _convert_scalar_indexer(self, key, kind=None): """ - we don't allow integer or float indexing on datetime-like when using - loc + We don't allow integer or float indexing on datetime-like when using + loc. Parameters ---------- @@ -581,8 +590,8 @@ def _convert_scalar_indexer(self, key, kind=None): @classmethod def _add_datetimelike_methods(cls): """ - add in the datetimelike methods (as we may have to override the - superclass) + Add in the datetimelike methods (as we may have to override the + superclass). """ def __add__(self, other): @@ -613,7 +622,7 @@ def __rsub__(self, other): def isin(self, values): """ Compute boolean array of whether each index value is found in the - passed set of values + passed set of values. Parameters ---------- @@ -633,7 +642,7 @@ def isin(self, values): def repeat(self, repeats, *args, **kwargs): """ - Analogous to ndarray.repeat + Analogous to ndarray.repeat. """ nv.validate_repeat(args, kwargs) if is_period_dtype(self): @@ -654,7 +663,7 @@ def where(self, cond, other=None): def _summary(self, name=None): """ - Return a summarized representation + Return a summarized representation. Parameters ---------- @@ -685,7 +694,7 @@ def _summary(self, name=None): def _concat_same_dtype(self, to_concat, name): """ - Concatenate to_concat which has the same class + Concatenate to_concat which has the same class. """ attribs = self._get_attributes_dict() attribs['name'] = name @@ -742,7 +751,7 @@ def _time_shift(self, periods, freq=None): def _ensure_datetimelike_to_i8(other, to_utc=False): """ - helper for coercing an input scalar or array to i8 + Helper for coercing an input scalar or array to i8. Parameters ---------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f9483b48b5261c..ea6dfa6a3a6aff 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -121,7 +121,7 @@ def _codes_to_ints(self, codes): class MultiIndex(Index): """ - A multi-level, or hierarchical, index object for pandas objects + A multi-level, or hierarchical, index object for pandas objects. Parameters ---------- @@ -1195,14 +1195,14 @@ def to_frame(self, index=True, name=None): def to_hierarchical(self, n_repeat, n_shuffle=1): """ - .. deprecated:: 0.24.0 - Return a MultiIndex reshaped to conform to the shapes given by n_repeat and n_shuffle. Useful to replicate and rearrange a MultiIndex for combination with another Index with n_repeat items. + .. deprecated:: 0.24.0 + Parameters ---------- n_repeat : int diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0d4e7aaebeca5f..d6286244fcb7ec 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -122,7 +122,7 @@ def ensure_int(value, field): @classmethod def from_range(cls, data, name=None, dtype=None, **kwargs): - """ create RangeIndex from a range (py3), or xrange (py2) object """ + """ Create RangeIndex from a range (py3), or xrange (py2) object. """ if not isinstance(data, range): raise TypeError( '{0}(...) must be called with object coercible to a ' diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 90016f599addc5..b976dc27a69f70 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -48,7 +48,7 @@ def _ensure_like_indices(time, panels): """ - Makes sure that time and panels are conformable + Makes sure that time and panels are conformable. """ n_time = len(time) n_panel = len(panels) @@ -63,7 +63,7 @@ def _ensure_like_indices(time, panels): def panel_index(time, panels, names=None): """ - Returns a multi-index suitable for a panel-like DataFrame + Returns a multi-index suitable for a panel-like DataFrame. Parameters ---------- @@ -157,7 +157,7 @@ def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, def _init_data(self, data, copy, dtype, **kwargs): """ Generate ND initialization; axes are passed - as required objects to __init__ + as required objects to __init__. """ if data is None: data = {} @@ -242,7 +242,7 @@ def _init_arrays(self, arrays, arr_names, axes): @classmethod def from_dict(cls, data, intersect=False, orient='items', dtype=None): """ - Construct Panel from dict of DataFrame objects + Construct Panel from dict of DataFrame objects. Parameters ---------- @@ -348,7 +348,7 @@ def _compare_constructor(self, other, func): def __unicode__(self): """ - Return a string representation for a particular Panel + Return a string representation for a particular Panel. Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. @@ -377,7 +377,7 @@ def _get_plane_axes_index(self, axis): """ Get my plane axes indexes: these are already (as compared with higher level planes), - as we are returning a DataFrame axes indexes + as we are returning a DataFrame axes indexes. """ axis_name = self._get_axis_name(axis) @@ -397,7 +397,7 @@ def _get_plane_axes(self, axis): """ Get my plane axes indexes: these are already (as compared with higher level planes), - as we are returning a DataFrame axes + as we are returning a DataFrame axes. """ return [self._get_axis(axi) for axi in self._get_plane_axes_index(axis)] @@ -409,14 +409,14 @@ def to_sparse(self, *args, **kwargs): NOT IMPLEMENTED: do not call this method, as sparsifying is not supported for Panel objects and will raise an error. - Convert to SparsePanel + Convert to SparsePanel. """ raise NotImplementedError("sparsifying is not supported " "for Panel objects") def to_excel(self, path, na_rep='', engine=None, **kwargs): """ - Write each DataFrame in Panel to a separate excel sheet + Write each DataFrame in Panel to a separate excel sheet. Parameters ---------- @@ -473,7 +473,8 @@ def as_matrix(self): # Getting and setting elements def get_value(self, *args, **kwargs): - """Quickly retrieve single value at (item, major, minor) location + """ + Quickly retrieve single value at (item, major, minor) location. .. deprecated:: 0.21.0 @@ -520,7 +521,8 @@ def _get_value(self, *args, **kwargs): _get_value.__doc__ = get_value.__doc__ def set_value(self, *args, **kwargs): - """Quickly set single value at (item, major, minor) location + """ + Quickly set single value at (item, major, minor) location. .. deprecated:: 0.21.0 @@ -619,7 +621,9 @@ def __setitem__(self, key, value): NDFrame._set_item(self, key, mat) def _unpickle_panel_compat(self, state): # pragma: no cover - "Unpickle the panel" + """ + Unpickle the panel. + """ from pandas.io.pickle import _unpickle_array _unpickle = _unpickle_array @@ -687,7 +691,9 @@ def round(self, decimals=0, *args, **kwargs): raise TypeError("decimals must be an integer") def _needs_reindex_multi(self, axes, method, level): - """ don't allow a multi reindex on Panel or above ndim """ + """ + Don't allow a multi reindex on Panel or above ndim. + """ return False def align(self, other, **kwargs): @@ -695,7 +701,7 @@ def align(self, other, **kwargs): def dropna(self, axis=0, how='any', inplace=False): """ - Drop 2D from panel, holding passed axis constant + Drop 2D from panel, holding passed axis constant. Parameters ---------- @@ -787,7 +793,7 @@ def _combine_panel(self, other, func): def major_xs(self, key): """ - Return slice of panel along major axis + Return slice of panel along major axis. Parameters ---------- @@ -811,7 +817,7 @@ def major_xs(self, key): def minor_xs(self, key): """ - Return slice of panel along minor axis + Return slice of panel along minor axis. Parameters ---------- @@ -835,7 +841,7 @@ def minor_xs(self, key): def xs(self, key, axis=1): """ - Return slice of panel along selected axis + Return slice of panel along selected axis. Parameters ---------- @@ -871,6 +877,8 @@ def xs(self, key, axis=1): def _ixs(self, i, axis=0): """ + Parameters + ---------- i : int, slice, or sequence of integers axis : int """ @@ -898,7 +906,7 @@ def _ixs(self, i, axis=0): def groupby(self, function, axis='major'): """ - Group data on given axis, returning GroupBy object + Group data on given axis, returning GroupBy object. Parameters ---------- @@ -993,7 +1001,7 @@ def construct_index_parts(idx, major=True): def apply(self, func, axis='major', **kwargs): """ - Applies function along axis (or axes) of the Panel + Applies function along axis (or axes) of the Panel. Parameters ---------- @@ -1115,8 +1123,9 @@ def _apply_1d(self, func, axis): return self._construct_return_type(results, planes) def _apply_2d(self, func, axis): - """ handle 2-d slices, equiv to iterating over the other axis """ - + """ + Handle 2-d slices, equiv to iterating over the other axis. + """ ndim = self.ndim axis = [self._get_axis_number(a) for a in axis] @@ -1172,7 +1181,9 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, return self._construct_return_type(result, axes) def _construct_return_type(self, result, axes=None): - """ return the type for the ndim of the result """ + """ + Return the type for the ndim of the result. + """ ndim = getattr(result, 'ndim', None) # need to assume they are the same @@ -1308,6 +1319,7 @@ def count(self, axis='major'): def shift(self, periods=1, freq=None, axis='major'): """ Shift index by desired number of periods with an optional time freq. + The shifted data will not include the dropped periods and the shifted axis will be smaller than the original. This is different from the behavior of DataFrame.shift() @@ -1333,7 +1345,7 @@ def tshift(self, periods=1, freq=None, axis='major'): def join(self, other, how='left', lsuffix='', rsuffix=''): """ - Join items with other Panel either on major and minor axes column + Join items with other Panel either on major and minor axes column. Parameters ---------- @@ -1440,13 +1452,17 @@ def _get_join_index(self, other, how): # miscellaneous data creation @staticmethod def _extract_axes(self, data, axes, **kwargs): - """ return a list of the axis indices """ + """ + Return a list of the axis indices. + """ return [self._extract_axis(self, data, axis=i, **kwargs) for i, a in enumerate(axes)] @staticmethod def _extract_axes_for_slice(self, axes): - """ return the slice dictionary for these axes """ + """ + Return the slice dictionary for these axes. + """ return {self._AXIS_SLICEMAP[i]: a for i, a in zip(self._AXIS_ORDERS[self._AXIS_LEN - len(axes):], axes)} diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 125b441e5558a5..1e3fa166a9b4c6 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -81,7 +81,9 @@ def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs): self.groupby._set_grouper(self._convert_obj(obj), sort=True) def __unicode__(self): - """ provide a nice str repr of our rolling object """ + """ + Provide a nice str repr of our rolling object. + """ attrs = ["{k}={v}".format(k=k, v=getattr(self.groupby, k)) for k in self._attributes if getattr(self.groupby, k, None) is not None] @@ -100,7 +102,7 @@ def __getattr__(self, attr): def __iter__(self): """ - Resampler iterator + Resampler iterator. Returns ------- @@ -124,14 +126,18 @@ def ax(self): @property def _typ(self): - """ masquerade for compat as a Series or a DataFrame """ + """ + Masquerade for compat as a Series or a DataFrame. + """ if isinstance(self._selected_obj, pd.Series): return 'series' return 'dataframe' @property def _from_selection(self): - """ is the resampling from a DataFrame column or MultiIndex level """ + """ + Is the resampling from a DataFrame column or MultiIndex level. + """ # upsampling and PeriodIndex resampling do not work # with selection, this state used to catch and raise an error return (self.groupby is not None and @@ -140,7 +146,7 @@ def _from_selection(self): def _convert_obj(self, obj): """ - provide any conversions for the object in order to correctly handle + Provide any conversions for the object in order to correctly handle. Parameters ---------- @@ -158,17 +164,17 @@ def _get_binner_for_time(self): def _set_binner(self): """ - setup our binners - cache these as we are an immutable object - """ + Setup our binners. + Cache these as we are an immutable object + """ if self.binner is None: self.binner, self.grouper = self._get_binner() def _get_binner(self): """ - create the BinGrouper, assume that self.set_grouper(obj) - has already been called + Create the BinGrouper, assume that self.set_grouper(obj) + has already been called. """ binner, bins, binlabels = self._get_binner_for_time() @@ -176,28 +182,31 @@ def _get_binner(self): return binner, bin_grouper def _assure_grouper(self): - """ make sure that we are creating our binner & grouper """ + """ + Make sure that we are creating our binner & grouper. + """ self._set_binner() @Substitution(klass='Resampler', versionadded='.. versionadded:: 0.23.0', examples=""" ->>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, -... index=pd.date_range('2012-08-02', periods=4)) ->>> df - A -2012-08-02 1 -2012-08-03 2 -2012-08-04 3 -2012-08-05 4 - -To get the difference between each 2-day period's maximum and minimum value in -one pass, you can do - ->>> df.resample('2D').pipe(lambda x: x.max() - x.min()) - A -2012-08-02 1 -2012-08-04 1""") + >>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, + ... index=pd.date_range('2012-08-02', periods=4)) + >>> df + A + 2012-08-02 1 + 2012-08-03 2 + 2012-08-04 3 + 2012-08-05 4 + + To get the difference between each 2-day period's maximum and minimum + value in one pass, you can do + + >>> df.resample('2D').pipe(lambda x: x.max() - x.min()) + A + 2012-08-02 1 + 2012-08-04 1 + """) @Appender(_pipe_template) def pipe(self, func, *args, **kwargs): return super(Resampler, self).pipe(func, *args, **kwargs) @@ -270,7 +279,7 @@ def aggregate(self, func, *args, **kwargs): def transform(self, arg, *args, **kwargs): """ Call function producing a like-indexed Series on each group and return - a Series with the transformed values + a Series with the transformed values. Parameters ---------- @@ -296,8 +305,7 @@ def _upsample(self, f, limit=None, fill_value=None): def _gotitem(self, key, ndim, subset=None): """ - sub-classes to define - return a sliced object + Sub-classes to define. Return a sliced object. Parameters ---------- @@ -320,7 +328,9 @@ def _gotitem(self, key, ndim, subset=None): return grouped def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs): - """ re-evaluate the obj with a groupby aggregation """ + """ + Re-evaluate the obj with a groupby aggregation. + """ if grouper is None: self._set_binner() @@ -352,7 +362,7 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs): def _apply_loffset(self, result): """ - if loffset is set, offset the result index + If loffset is set, offset the result index. This is NOT an idempotent routine, it will be applied exactly once to the result. @@ -377,11 +387,15 @@ def _apply_loffset(self, result): return result def _get_resampler_for_grouping(self, groupby, **kwargs): - """ return the correct class for resampling with groupby """ + """ + Return the correct class for resampling with groupby. + """ return self._resampler_for_grouping(self, groupby=groupby, **kwargs) def _wrap_result(self, result): - """ potentially wrap any results """ + """ + Potentially wrap any results. + """ if isinstance(result, ABCSeries) and self._selection is not None: result.name = self._selection @@ -394,7 +408,7 @@ def _wrap_result(self, result): def pad(self, limit=None): """ - Forward fill the values + Forward fill the values. Parameters ---------- @@ -757,8 +771,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, def asfreq(self, fill_value=None): """ - return the values at the new freq, - essentially a reindex + Return the values at the new freq, essentially a reindex. Parameters ---------- @@ -777,7 +790,7 @@ def asfreq(self, fill_value=None): def std(self, ddof=1, *args, **kwargs): """ - Compute standard deviation of groups, excluding missing values + Compute standard deviation of groups, excluding missing values. Parameters ---------- @@ -789,12 +802,12 @@ def std(self, ddof=1, *args, **kwargs): def var(self, ddof=1, *args, **kwargs): """ - Compute variance of groups, excluding missing values + Compute variance of groups, excluding missing values. Parameters ---------- ddof : integer, default 1 - degrees of freedom + degrees of freedom """ nv.validate_resampler_func('var', args, kwargs) return self._downsample('var', ddof=ddof) @@ -863,8 +876,10 @@ def f(self, _method=method): def _maybe_process_deprecations(r, how=None, fill_method=None, limit=None): - """ potentially we might have a deprecation warning, show it - but call the appropriate methods anyhow """ + """ + Potentially we might have a deprecation warning, show it + but call the appropriate methods anyhow. + """ if how is not None: @@ -909,8 +924,9 @@ def _maybe_process_deprecations(r, how=None, fill_method=None, limit=None): class _GroupByMixin(GroupByMixin): - """ provide the groupby facilities """ - + """ + Provide the groupby facilities. + """ def __init__(self, obj, *args, **kwargs): parent = kwargs.pop('parent', None) @@ -931,8 +947,8 @@ def __init__(self, obj, *args, **kwargs): def _apply(self, f, grouper=None, *args, **kwargs): """ - dispatch to _upsample; we are stripping all of the _upsample kwargs and - performing the original function call on the grouped object + Dispatch to _upsample; we are stripping all of the _upsample kwargs and + performing the original function call on the grouped object. """ def func(x): @@ -966,7 +982,7 @@ def _get_binner_for_time(self): def _downsample(self, how, **kwargs): """ - Downsample the cython defined function + Downsample the cython defined function. Parameters ---------- @@ -1003,6 +1019,7 @@ def _downsample(self, how, **kwargs): def _adjust_binner_for_upsample(self, binner): """ Adjust our binner when upsampling. + The range of a new index should not be outside specified range """ if self.closed == 'right': @@ -1013,6 +1030,8 @@ def _adjust_binner_for_upsample(self, binner): def _upsample(self, method, limit=None, fill_value=None): """ + Parameters + ---------- method : string {'backfill', 'bfill', 'pad', 'ffill', 'asfreq'} method for upsampling limit : int, default None @@ -1065,7 +1084,6 @@ class DatetimeIndexResamplerGroupby(_GroupByMixin, DatetimeIndexResampler): Provides a resample of a groupby implementation .. versionadded:: 0.18.1 - """ @property def _constructor(self): @@ -1106,7 +1124,7 @@ def _convert_obj(self, obj): def _downsample(self, how, **kwargs): """ - Downsample the cython defined function + Downsample the cython defined function. Parameters ---------- @@ -1143,6 +1161,8 @@ def _downsample(self, how, **kwargs): def _upsample(self, method, limit=None, fill_value=None): """ + Parameters + ---------- method : string {'backfill', 'bfill', 'pad', 'ffill'} method for upsampling limit : int, default None @@ -1177,10 +1197,9 @@ def _upsample(self, method, limit=None, fill_value=None): class PeriodIndexResamplerGroupby(_GroupByMixin, PeriodIndexResampler): """ - Provides a resample of a groupby implementation + Provides a resample of a groupby implementation. .. versionadded:: 0.18.1 - """ @property def _constructor(self): @@ -1199,6 +1218,7 @@ def _get_binner_for_time(self): def _adjust_binner_for_upsample(self, binner): """ Adjust our binner when upsampling. + The range of a new index is allowed to be greater than original range so we don't need to change the length of a binner, GH 13022 """ @@ -1207,10 +1227,9 @@ def _adjust_binner_for_upsample(self, binner): class TimedeltaIndexResamplerGroupby(_GroupByMixin, TimedeltaIndexResampler): """ - Provides a resample of a groupby implementation + Provides a resample of a groupby implementation. .. versionadded:: 0.18.1 - """ @property def _constructor(self): @@ -1218,7 +1237,9 @@ def _constructor(self): def resample(obj, kind=None, **kwds): - """ create a TimeGrouper and return our resampler """ + """ + Create a TimeGrouper and return our resampler. + """ tg = TimeGrouper(**kwds) return tg._get_resampler(obj, kind=kind) @@ -1228,7 +1249,9 @@ def resample(obj, kind=None, **kwds): def get_resampler_for_grouping(groupby, rule, how=None, fill_method=None, limit=None, kind=None, **kwargs): - """ return our appropriate resampler when grouping as well """ + """ + Return our appropriate resampler when grouping as well. + """ # .resample uses 'on' similar to how .groupby uses 'key' kwargs['key'] = kwargs.pop('on', None) @@ -1244,7 +1267,7 @@ def get_resampler_for_grouping(groupby, rule, how=None, fill_method=None, class TimeGrouper(Grouper): """ - Custom groupby class for time-interval grouping + Custom groupby class for time-interval grouping. Parameters ---------- @@ -1311,7 +1334,7 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean', def _get_resampler(self, obj, kind=None): """ - return my resampler or raise if we have an invalid axis + Return my resampler or raise if we have an invalid axis. Parameters ---------- @@ -1643,7 +1666,7 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None): """ - Utility frequency conversion method for Series/DataFrame + Utility frequency conversion method for Series/DataFrame. """ if isinstance(obj.index, PeriodIndex): if method is not None: diff --git a/pandas/core/series.py b/pandas/core/series.py index c9b1a2c45eab38..4b8274a9e8333c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -75,7 +75,8 @@ # see gh-16971 def remove_na(arr): - """Remove null values from array like structure. + """ + Remove null values from array like structure. .. deprecated:: 0.21.0 Use s[s.notnull()] instead. @@ -87,7 +88,9 @@ def remove_na(arr): def _coerce_method(converter): - """ install the scalar coercion methods """ + """ + Install the scalar coercion methods. + """ def wrapper(self): if len(self) == 1: @@ -310,7 +313,8 @@ def _init_dict(self, data, index=None, dtype=None): @classmethod def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, fastpath=False): - """Construct Series from array. + """ + Construct Series from array. .. deprecated :: 0.23.0 Use pd.Series(..) constructor instead. @@ -341,7 +345,9 @@ def _can_hold_na(self): _index = None def _set_axis(self, axis, labels, fastpath=False): - """ override generic, we want to set the _typ here """ + """ + Override generic, we want to set the _typ here. + """ if not fastpath: labels = ensure_index(labels) @@ -418,8 +424,7 @@ def ftypes(self): @property def values(self): """ - Return Series as ndarray or ndarray-like - depending on the dtype + Return Series as ndarray or ndarray-like depending on the dtype. Returns ------- @@ -455,8 +460,9 @@ def _values(self): return self._data.internal_values() def _formatting_values(self): - """Return the values that can be formatted (used by SeriesFormatter - and DataFrameFormatter) + """ + Return the values that can be formatted (used by SeriesFormatter + and DataFrameFormatter). """ return self._data.formatting_values() @@ -468,7 +474,8 @@ def get_values(self): @property def asobject(self): - """Return object Series which contains boxed values. + """ + Return object Series which contains boxed values. .. deprecated :: 0.23.0 @@ -483,7 +490,7 @@ def asobject(self): # ops def ravel(self, order='C'): """ - Return the flattened underlying data as an ndarray + Return the flattened underlying data as an ndarray. See Also -------- @@ -493,7 +500,7 @@ def ravel(self, order='C'): def compress(self, condition, *args, **kwargs): """ - Return selected slices of an array along given axis as a Series + Return selected slices of an array along given axis as a Series. .. deprecated:: 0.24.0 @@ -510,7 +517,7 @@ def compress(self, condition, *args, **kwargs): def nonzero(self): """ - Return the *integer* indices of the elements that are non-zero + Return the *integer* indices of the elements that are non-zero. This method is equivalent to calling `numpy.nonzero` on the series data. For compatibility with NumPy, the return value is @@ -545,8 +552,7 @@ def nonzero(self): def put(self, *args, **kwargs): """ - Applies the `put` method to its `values` attribute - if it has one. + Applies the `put` method to its `values` attribute if it has one. See Also -------- @@ -556,7 +562,7 @@ def put(self, *args, **kwargs): def __len__(self): """ - return the length of the Series + Return the length of the Series. """ return len(self._data) @@ -631,20 +637,20 @@ def view(self, dtype=None): def __array__(self, result=None): """ - the array interface, return my values + The array interface, return my values. """ return self.get_values() def __array_wrap__(self, result, context=None): """ - Gets called after a ufunc + Gets called after a ufunc. """ return self._constructor(result, index=self.index, copy=False).__finalize__(self) def __array_prepare__(self, result, context=None): """ - Gets called prior to a ufunc + Gets called prior to a ufunc. """ # nice error message for non-ufunc types @@ -713,12 +719,14 @@ def _unpickle_series_compat(self, state): # indexers @property def axes(self): - """Return a list of the row axis labels""" + """ + Return a list of the row axis labels. + """ return [self.index] def _ixs(self, i, axis=0): """ - Return the i-th value or values in the Series by location + Return the i-th value or values in the Series by location. Parameters ---------- @@ -1013,7 +1021,8 @@ def repeat(self, repeats, *args, **kwargs): index=new_index).__finalize__(self) def get_value(self, label, takeable=False): - """Quickly retrieve single value at passed index label + """ + Quickly retrieve single value at passed index label. .. deprecated:: 0.21.0 Please use .at[] or .iat[] accessors. @@ -1040,9 +1049,11 @@ def _get_value(self, label, takeable=False): _get_value.__doc__ = get_value.__doc__ def set_value(self, label, value, takeable=False): - """Quickly set single value at passed label. If label is not contained, - a new object is created with the label placed at the end of the result - index. + """ + Quickly set single value at passed label. + + If label is not contained, a new object is created with the label + placed at the end of the result index. .. deprecated:: 0.21.0 Please use .at[] or .iat[] accessors. @@ -1215,7 +1226,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): def __unicode__(self): """ - Return a string representation for a particular DataFrame + Return a string representation for a particular DataFrame. Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. @@ -1236,7 +1247,7 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, index=True, length=False, dtype=False, name=False, max_rows=None): """ - Render a string representation of the Series + Render a string representation of the Series. Parameters ---------- @@ -1290,7 +1301,7 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, def iteritems(self): """ - Lazily iterate over (index, value) tuples + Lazily iterate over (index, value) tuples. """ return zip(iter(self.index), iter(self)) @@ -1300,7 +1311,9 @@ def iteritems(self): # Misc public methods def keys(self): - """Alias for index""" + """ + Alias for index. + """ return self.index def to_dict(self, into=dict): @@ -1339,7 +1352,7 @@ def to_dict(self, into=dict): def to_frame(self, name=None): """ - Convert Series to DataFrame + Convert Series to DataFrame. Parameters ---------- @@ -1360,7 +1373,7 @@ def to_frame(self, name=None): def to_sparse(self, kind='block', fill_value=None): """ - Convert Series to SparseSeries + Convert Series to SparseSeries. Parameters ---------- @@ -1401,7 +1414,7 @@ def _set_name(self, name, inplace=False): def count(self, level=None): """ - Return number of non-NA/null observations in the Series + Return number of non-NA/null observations in the Series. Parameters ---------- @@ -1433,7 +1446,8 @@ def count(self, level=None): dtype='int64').__finalize__(self) def mode(self, dropna=True): - """Return the mode(s) of the dataset. + """ + Return the mode(s) of the dataset. Always returns Series even if only one value is returned. @@ -1796,7 +1810,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs): # ndarray compat argmin = deprecate( 'argmin', idxmin, '0.21.0', - msg=dedent("""\ + msg=dedent(""" The current behaviour of 'Series.argmin' is deprecated, use 'idxmin' instead. The behavior of 'argmin' will be corrected to return the positional @@ -1806,7 +1820,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs): ) argmax = deprecate( 'argmax', idxmax, '0.21.0', - msg=dedent("""\ + msg=dedent(""" The current behaviour of 'Series.argmax' is deprecated, use 'idxmax' instead. The behavior of 'argmax' will be corrected to return the positional @@ -1899,7 +1913,7 @@ def quantile(self, q=0.5, interpolation='linear'): def corr(self, other, method='pearson', min_periods=None): """ - Compute correlation with `other` Series, excluding missing values + Compute correlation with `other` Series, excluding missing values. Parameters ---------- @@ -1942,7 +1956,7 @@ def corr(self, other, method='pearson', min_periods=None): def cov(self, other, min_periods=None): """ - Compute covariance with Series, excluding missing values + Compute covariance with Series, excluding missing values. Parameters ---------- @@ -2149,11 +2163,15 @@ def dot(self, other): raise TypeError('unsupported type: %s' % type(other)) def __matmul__(self, other): - """ Matrix multiplication using binary `@` operator in Python>=3.5 """ + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ return self.dot(other) def __rmatmul__(self, other): - """ Matrix multiplication using binary `@` operator in Python>=3.5 """ + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ return self.dot(np.transpose(other)) @Substitution(klass='Series') @@ -2250,7 +2268,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): def _binop(self, other, func, level=None, fill_value=None): """ - Perform generic binary operation with optional fill value + Perform generic binary operation with optional fill value. Parameters ---------- @@ -2357,7 +2375,7 @@ def combine(self, other, func, fill_value=None): eagle 200.0 falcon 345.0 dtype: float64 -""" + """ if fill_value is None: fill_value = na_value_for_dtype(self.dtype, compat=False) @@ -2439,7 +2457,7 @@ def combine_first(self, other): def update(self, other): """ Modify Series in place using non-NA values from passed - Series. Aligns on index + Series. Aligns on index. Parameters ---------- @@ -2814,7 +2832,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, def argsort(self, axis=0, kind='quicksort', order=None): """ Overrides ndarray.argsort. Argsorts the value, omitting NA/null values, - and places the result in the same locations as the non-NA values + and places the result in the same locations as the non-NA values. Parameters ---------- @@ -3040,7 +3058,7 @@ def nsmallest(self, n=5, keep='first'): def swaplevel(self, i=-2, j=-1, copy=True): """ - Swap levels i and j in a MultiIndex + Swap levels i and j in a MultiIndex. Parameters ---------- @@ -3062,8 +3080,9 @@ def swaplevel(self, i=-2, j=-1, copy=True): def reorder_levels(self, order): """ - Rearrange index levels using input order. May not drop or duplicate - levels + Rearrange index levels using input order. + + May not drop or duplicate levels. Parameters ---------- @@ -3208,8 +3227,7 @@ def map(self, arg, na_action=None): def _gotitem(self, key, ndim, subset=None): """ - sub-classes to define - return a sliced object + Sub-classes to define. Return a sliced object. Parameters ---------- @@ -3290,8 +3308,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): """ Invoke function on values of Series. - Can be ufunc (a NumPy function that applies to the entire Series) or a - Python function that only works on single values. + Can be ufunc (a NumPy function that applies to the entire Series) + or a Python function that only works on single values. Parameters ---------- @@ -3423,11 +3441,10 @@ def f(x): def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): """ - perform a reduction operation - - if we have an ndarray as a value, then simply perform the operation, - otherwise delegate to the object + Perform a reduction operation. + If we have an ndarray as a value, then simply perform the operation, + otherwise delegate to the object. """ delegate = self._values @@ -3464,8 +3481,9 @@ def _reindex_indexer(self, new_index, indexer, copy): return self._constructor(new_values, index=new_index) def _needs_reindex_multi(self, axes, method, level): - """ check if we do need a multi reindex; this is for compat with - higher dims + """ + Check if we do need a multi reindex; this is for compat with + higher dims. """ return False @@ -3480,7 +3498,8 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, broadcast_axis=broadcast_axis) def rename(self, index=None, **kwargs): - """Alter Series index labels or name + """ + Alter Series index labels or name. Function / dict values must be unique (1-to-1). Labels not contained in a dict / Series will be left as-is. Extra labels listed don't throw an @@ -3664,7 +3683,8 @@ def shift(self, periods=1, freq=None, axis=0): return super(Series, self).shift(periods=periods, freq=freq, axis=axis) def reindex_axis(self, labels, axis=0, **kwargs): - """Conform Series to new index with optional filling logic. + """ + Conform Series to new index with optional filling logic. .. deprecated:: 0.21.0 Use ``Series.reindex`` instead. @@ -3893,7 +3913,8 @@ def between(self, left, right, inclusive=True): @classmethod def from_csv(cls, path, sep=',', parse_dates=True, header=None, index_col=0, encoding=None, infer_datetime_format=False): - """Read CSV file. + """ + Read CSV file. .. deprecated:: 0.21.0 Use :func:`pandas.read_csv` instead. @@ -4123,7 +4144,8 @@ def dropna(self, axis=0, inplace=False, **kwargs): return self.copy() def valid(self, inplace=False, **kwargs): - """Return Series without null values. + """ + Return Series without null values. .. deprecated:: 0.23.0 Use :meth:`Series.dropna` instead. @@ -4137,7 +4159,7 @@ def valid(self, inplace=False, **kwargs): def to_timestamp(self, freq=None, how='start', copy=True): """ - Cast to datetimeindex of timestamps, at *beginning* of period + Cast to datetimeindex of timestamps, at *beginning* of period. Parameters ---------- @@ -4162,7 +4184,7 @@ def to_timestamp(self, freq=None, how='start', copy=True): def to_period(self, freq=None, copy=True): """ Convert Series from DatetimeIndex to PeriodIndex with desired - frequency (inferred from index if not passed) + frequency (inferred from index if not passed). Parameters ---------- @@ -4210,8 +4232,9 @@ def to_period(self, freq=None, copy=True): def _sanitize_index(data, index, copy=False): - """ sanitize an index type to return an ndarray of the underlying, pass - thru a non-Index + """ + Sanitize an index type to return an ndarray of the underlying, pass + through a non-Index. """ if index is None: @@ -4238,8 +4261,9 @@ def _sanitize_index(data, index, copy=False): def _sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False): - """ sanitize input data to an ndarray, copy if specified, coerce to the - dtype if specified + """ + Sanitize input data to an ndarray, copy if specified, coerce to the + dtype if specified. """ if dtype is not None: diff --git a/pandas/core/window.py b/pandas/core/window.py index 4c67b04e89ba83..a079cea0fabd1f 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1,9 +1,6 @@ """ - -provide a generic structure to support window functions, -similar to how we have a Groupby object - - +Provide a generic structure to support window functions, +similar to how we have a Groupby object. """ from __future__ import division @@ -95,14 +92,17 @@ def validate(self): "'neither'") def _convert_freq(self): - """ resample according to the how, return a new object """ - + """ + Resample according to the how, return a new object. + """ obj = self._selected_obj index = None return obj, index def _create_blocks(self): - """ split data into blocks & return conformed data """ + """ + Split data into blocks & return conformed data. + """ obj, index = self._convert_freq() if index is not None: @@ -119,8 +119,7 @@ def _create_blocks(self): def _gotitem(self, key, ndim, subset=None): """ - sub-classes to define - return a sliced object + Sub-classes to define. Return a sliced object. Parameters ---------- @@ -161,7 +160,9 @@ def _window_type(self): return self.__class__.__name__ def __unicode__(self): - """ provide a nice str repr of our rolling object """ + """ + Provide a nice str repr of our rolling object. + """ attrs = ["{k}={v}".format(k=k, v=getattr(self, k)) for k in self._attributes @@ -175,7 +176,7 @@ def __iter__(self): def _get_index(self, index=None): """ - Return index as ndarrays + Return index as ndarrays. Returns ------- @@ -219,7 +220,9 @@ def _prep_values(self, values=None, kill_inf=True): return values def _wrap_result(self, result, block=None, obj=None): - """ wrap a single result """ + """ + Wrap a single result. + """ if obj is None: obj = self._selected_obj @@ -243,7 +246,7 @@ def _wrap_result(self, result, block=None, obj=None): def _wrap_results(self, results, blocks, obj): """ - wrap the results + Wrap the results. Parameters ---------- @@ -288,7 +291,9 @@ def _wrap_results(self, results, blocks, obj): return concat(final, axis=1).reindex(columns=columns, copy=False) def _center_window(self, result, window): - """ center the result in the window """ + """ + Center the result in the window. + """ if self.axis > result.ndim - 1: raise ValueError("Requested axis is larger then no. of argument " "dimensions") @@ -606,8 +611,8 @@ def validate(self): def _prep_window(self, **kwargs): """ - provide validation for our window type, return the window - we have already been validated + Provide validation for our window type, return the window + we have already been validated. """ window = self._get_window() @@ -757,7 +762,9 @@ def mean(self, *args, **kwargs): class _GroupByMixin(GroupByMixin): - """ provide the groupby facilities """ + """ + Provide the groupby facilities. + """ def __init__(self, obj, *args, **kwargs): parent = kwargs.pop('parent', None) # noqa @@ -776,8 +783,8 @@ def __init__(self, obj, *args, **kwargs): def _apply(self, func, name, window=None, center=None, check_minp=None, **kwargs): """ - dispatch to apply; we are stripping all of the _apply kwargs and - performing the original function call on the grouped object + Dispatch to apply; we are stripping all of the _apply kwargs and + performing the original function call on the grouped object. """ def f(x, name=name, *args): @@ -800,8 +807,9 @@ def _constructor(self): def _apply(self, func, name=None, window=None, center=None, check_minp=None, **kwargs): """ - Rolling statistical measure using supplied function. Designed to be - used with passed-in Cython array-based functions. + Rolling statistical measure using supplied function. + + Designed to be used with passed-in Cython array-based functions. Parameters ---------- @@ -937,7 +945,7 @@ def count(self): return self._wrap_results(results, blocks, obj) _shared_docs['apply'] = dedent(r""" - %(name)s function apply + %(name)s function apply. Parameters ---------- @@ -995,7 +1003,7 @@ def sum(self, *args, **kwargs): return self._apply('roll_sum', 'sum', **kwargs) _shared_docs['max'] = dedent(""" - %(name)s maximum + Calculate the %(name)s maximum. """) def max(self, *args, **kwargs): @@ -1259,7 +1267,7 @@ def kurt(self, **kwargs): check_minp=_require_min_periods(4), **kwargs) _shared_docs['quantile'] = dedent(""" - %(name)s quantile. + Calculate the %(name)s quantile. Parameters ---------- @@ -1333,7 +1341,7 @@ def f(arg, *args, **kwargs): **kwargs) _shared_docs['cov'] = dedent(""" - %(name)s sample covariance + Calculate the %(name)s sample covariance. Parameters ---------- @@ -1485,7 +1493,7 @@ def _get_cov(X, Y): Y 0.626300 1.000000 4 X 1.000000 0.555368 Y 0.555368 1.000000 -""") + """) def corr(self, other=None, pairwise=None, **kwargs): if other is None: @@ -1566,14 +1574,18 @@ def validate(self): "and offset based windows") def _validate_monotonic(self): - """ validate on is monotonic """ + """ + Validate on is_monotonic. + """ if not self._on.is_monotonic: formatted = self.on or 'index' raise ValueError("{0} must be " "monotonic".format(formatted)) def _validate_freq(self): - """ validate & return window frequency """ + """ + Validate & return window frequency. + """ from pandas.tseries.frequencies import to_offset try: return to_offset(self.window) @@ -1760,7 +1772,7 @@ def corr(self, other=None, pairwise=None, **kwargs): class RollingGroupby(_GroupByMixin, Rolling): """ - Provides a rolling groupby implementation + Provides a rolling groupby implementation. .. versionadded:: 0.18.1 @@ -1781,10 +1793,10 @@ def _gotitem(self, key, ndim, subset=None): def _validate_monotonic(self): """ - validate that on is monotonic; + Validate that on is monotonic; we don't care for groupby.rolling because we have already validated at a higher - level + level. """ pass @@ -2031,7 +2043,7 @@ def corr(self, other=None, pairwise=None, **kwargs): class ExpandingGroupby(_GroupByMixin, Expanding): """ - Provides a expanding groupby implementation + Provides a expanding groupby implementation. .. versionadded:: 0.18.1 @@ -2069,7 +2081,7 @@ def _constructor(self): class EWM(_Rolling): r""" - Provides exponential weighted functions + Provides exponential weighted functions. .. versionadded:: 0.18.0 @@ -2219,7 +2231,8 @@ def aggregate(self, arg, *args, **kwargs): agg = aggregate def _apply(self, func, **kwargs): - """Rolling statistical measure using supplied function. Designed to be + """ + Rolling statistical measure using supplied function. Designed to be used with passed-in Cython array-based functions. Parameters @@ -2261,7 +2274,9 @@ def func(arg): @Substitution(name='ewm') @Appender(_doc_template) def mean(self, *args, **kwargs): - """exponential weighted moving average""" + """ + Exponential weighted moving average. + """ nv.validate_window_func('mean', args, kwargs) return self._apply('ewma', **kwargs) @@ -2269,7 +2284,9 @@ def mean(self, *args, **kwargs): @Appender(_doc_template) @Appender(_bias_template) def std(self, bias=False, *args, **kwargs): - """exponential weighted moving stddev""" + """ + Exponential weighted moving stddev. + """ nv.validate_window_func('std', args, kwargs) return _zsqrt(self.var(bias=bias, **kwargs)) @@ -2279,7 +2296,9 @@ def std(self, bias=False, *args, **kwargs): @Appender(_doc_template) @Appender(_bias_template) def var(self, bias=False, *args, **kwargs): - """exponential weighted moving variance""" + """ + Exponential weighted moving variance. + """ nv.validate_window_func('var', args, kwargs) def f(arg): @@ -2293,7 +2312,9 @@ def f(arg): @Appender(_doc_template) @Appender(_pairwise_template) def cov(self, other=None, pairwise=None, bias=False, **kwargs): - """exponential weighted sample covariance""" + """ + Exponential weighted sample covariance. + """ if other is None: other = self._selected_obj # only default unset @@ -2316,7 +2337,9 @@ def _get_cov(X, Y): @Appender(_doc_template) @Appender(_pairwise_template) def corr(self, other=None, pairwise=None, **kwargs): - """exponential weighted sample correlation""" + """ + Exponential weighted sample correlation. + """ if other is None: other = self._selected_obj # only default unset diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 2c018598a6a6e1..dda50b6a0e7f8e 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -53,8 +53,7 @@ def _mpl(func): class Styler(object): """ - Helps style a DataFrame or Series according to the - data with HTML and CSS. + Helps style a DataFrame or Series according to the data with HTML and CSS. Parameters ---------- @@ -157,7 +156,9 @@ def default_display_func(x): self._display_funcs = defaultdict(lambda: default_display_func) def _repr_html_(self): - """Hooks into Jupyter notebook rich display system.""" + """ + Hooks into Jupyter notebook rich display system. + """ return self.render() @Appender(_shared_docs['to_excel'] % dict( @@ -187,7 +188,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', def _translate(self): """ Convert the DataFrame in `self.data` and the attrs from `_build_styles` - into a dictionary of {head, body, uuid, cellstyle} + into a dictionary of {head, body, uuid, cellstyle}. """ table_styles = self.table_styles or [] caption = self.caption @@ -417,7 +418,8 @@ def format(self, formatter, subset=None): return self def render(self, **kwargs): - """Render the built up styles to HTML + """ + Render the built up styles to HTML. Parameters ---------- @@ -467,8 +469,9 @@ def render(self, **kwargs): def _update_ctx(self, attrs): """ - update the state of the Styler. Collects a mapping - of {index_label: [': ']} + Update the state of the Styler. + + Collects a mapping of {index_label: [': ']}. attrs : Series or DataFrame should contain strings of ': ;: ' @@ -504,7 +507,8 @@ def __deepcopy__(self, memo): return self._copy(deepcopy=True) def clear(self): - """"Reset" the styler, removing any previously applied styles. + """ + Reset the styler, removing any previously applied styles. Returns None. """ self.ctx.clear() @@ -696,9 +700,10 @@ def set_precision(self, precision): def set_table_attributes(self, attributes): """ - Set the table attributes. These are the items - that show up in the opening ```` tag in addition - to to automatic (by default) id. + Set the table attributes. + + These are the items that show up in the opening ``
`` tag + in addition to to automatic (by default) id. Parameters ---------- @@ -720,6 +725,7 @@ def set_table_attributes(self, attributes): def export(self): """ Export the styles to applied to the current Styler. + Can be applied to a second style with ``Styler.use``. Returns @@ -785,8 +791,9 @@ def set_caption(self, caption): def set_table_styles(self, table_styles): """ - Set the table styles on a Styler. These are placed in a - ``