diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 1dc74ad83b7e6..a129b75636536 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -120,6 +120,7 @@ Methods Timestamp.timetuple Timestamp.timetz Timestamp.to_datetime64 + Timestamp.to_numpy Timestamp.to_julian_date Timestamp.to_period Timestamp.to_pydatetime @@ -191,6 +192,7 @@ Methods Timedelta.round Timedelta.to_pytimedelta Timedelta.to_timedelta64 + Timedelta.to_numpy Timedelta.total_seconds A collection of timedeltas may be stored in a :class:`TimedeltaArray`. diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 2c2e5c5425216..e4dd82afcdf65 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1317,7 +1317,7 @@ arbitrary function, for example: df.groupby(['Store', 'Product']).pipe(mean) where ``mean`` takes a GroupBy object and finds the mean of the Revenue and Quantity -columns repectively for each Store-Product combination. The ``mean`` function can +columns respectively for each Store-Product combination. The ``mean`` function can be any function that takes in a GroupBy object; the ``.pipe`` will pass the GroupBy object as a parameter into the function you specify. diff --git a/doc/source/whatsnew/v0.10.0.rst b/doc/source/whatsnew/v0.10.0.rst index bc2a4918bc27b..2d6550bb6888d 100644 --- a/doc/source/whatsnew/v0.10.0.rst +++ b/doc/source/whatsnew/v0.10.0.rst @@ -370,7 +370,7 @@ Updated PyTables Support df1.get_dtype_counts() - performance improvements on table writing -- support for arbitrarly indexed dimensions +- support for arbitrarily indexed dimensions - ``SparseSeries`` now has a ``density`` property (:issue:`2384`) - enable ``Series.str.strip/lstrip/rstrip`` methods to take an input argument to strip arbitrary characters (:issue:`2411`) diff --git a/doc/source/whatsnew/v0.16.1.rst b/doc/source/whatsnew/v0.16.1.rst index 7621cb9c1e27c..cbcb23e356577 100644 --- a/doc/source/whatsnew/v0.16.1.rst +++ b/doc/source/whatsnew/v0.16.1.rst @@ -136,7 +136,7 @@ groupby operations on the index will preserve the index nature as well reindexing operations, will return a resulting index based on the type of the passed indexer, meaning that passing a list will return a plain-old-``Index``; indexing with a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories -of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with +of the PASSED ``Categorical`` dtype. This allows one to arbitrarily index these even with values NOT in the categories, similarly to how you can reindex ANY pandas index. .. code-block:: ipython diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index f17c4974cd450..8e59c2300e7ca 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -25,6 +25,7 @@ Fixed Regressions - Fixed regression in :meth:`DataFrame.apply` causing ``RecursionError`` when ``dict``-like classes were passed as argument. (:issue:`25196`) - Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`) +- Fixed regression in :meth:`Series.min` and :meth:`Series.max` where ``numeric_only=True`` was ignored when the ``Series`` contained ```Categorical`` data (:issue:`25299`) .. _whatsnew_0242.enhancements: @@ -53,6 +54,7 @@ Bug Fixes **I/O** +- Better handling of terminal printing when the terminal dimensions are not known (:issue:`25080`); - Bug in reading a HDF5 table-format ``DataFrame`` created in Python 2, in Python 3 (:issue:`24925`) - Bug in reading a JSON with ``orient='table'`` generated by :meth:`DataFrame.to_json` with ``index=False`` (:issue:`25170`) - Bug where float indexes could have misaligned values when printing (:issue:`25061`) @@ -78,7 +80,7 @@ Bug Fixes **Reshaping** -- +- Bug in :meth:`pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`) - Bug in :func:`DataFrame.join` when joining on a timezone aware :class:`DatetimeIndex` (:issue:`23931`) - diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 965fa1a0a0906..078e54433fa43 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -20,8 +20,8 @@ Other Enhancements ^^^^^^^^^^^^^^^^^^ - :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`) -- :meth:`DataFrame.pivot` now supports multiple column indexes (:issue:`21425`) -- +- :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`) +- :meth:`DataFrame.pivot` now supports multiple column indexes by accepting a list of columns (:issue:`21425`) .. _whatsnew_0250.api_breaking: @@ -34,6 +34,7 @@ Other API Changes ^^^^^^^^^^^^^^^^^ - :class:`DatetimeTZDtype` will now standardize pytz timezones to a common timezone instance (:issue:`24713`) +- ``Timestamp`` and ``Timedelta`` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`) - - @@ -172,7 +173,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug in :meth:`pandas.core.resample.Resampler.agg` with a timezone aware index where ``OverflowError`` would raise when passing a list of functions (:issue:`22660`) - - diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index b64c3479f23fe..a13fcfdc855d5 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -188,6 +188,26 @@ cdef class _NaT(datetime): """ return np.datetime64('NaT', 'ns') + def to_numpy(self, dtype=None, copy=False): + """ + Convert the Timestamp to a NumPy datetime64. + + .. versionadded:: 0.25.0 + + This is an alias method for `Timestamp.to_datetime64()`. The dtype and + copy parameters are available here only for compatibility. Their values + will not affect the return value. + + Returns + ------- + numpy.datetime64 + + See Also + -------- + DatetimeIndex.to_numpy : Similar method for DatetimeIndex. + """ + return self.to_datetime64() + def __repr__(self): return 'NaT' diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 58b2faac8b06b..6e40063fb925a 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -824,6 +824,26 @@ cdef class _Timedelta(timedelta): """ Returns a numpy.timedelta64 object with 'ns' precision """ return np.timedelta64(self.value, 'ns') + def to_numpy(self, dtype=None, copy=False): + """ + Convert the Timestamp to a NumPy timedelta64. + + .. versionadded:: 0.25.0 + + This is an alias method for `Timedelta.to_timedelta64()`. The dtype and + copy parameters are available here only for compatibility. Their values + will not affect the return value. + + Returns + ------- + numpy.timedelta64 + + See Also + -------- + Series.to_numpy : Similar method for Series. + """ + return self.to_timedelta64() + def total_seconds(self): """ Total duration of timedelta in seconds (to ns precision) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 8a95d2494dfa4..a2929dbeb471f 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -345,6 +345,26 @@ cdef class _Timestamp(datetime): """ return np.datetime64(self.value, 'ns') + def to_numpy(self, dtype=None, copy=False): + """ + Convert the Timestamp to a NumPy datetime64. + + .. versionadded:: 0.25.0 + + This is an alias method for `Timestamp.to_datetime64()`. The dtype and + copy parameters are available here only for compatibility. Their values + will not affect the return value. + + Returns + ------- + numpy.datetime64 + + See Also + -------- + DatetimeIndex.to_numpy : Similar method for DatetimeIndex. + """ + return self.to_datetime64() + def __add__(self, other): cdef: int64_t other_int, nanos diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d7d0882bbcc94..79e565df94eae 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -214,7 +214,7 @@ def contains(cat, key, container): class Categorical(ExtensionArray, PandasObject): """ - Represent a categorical variable in classic R / S-plus fashion + Represent a categorical variable in classic R / S-plus fashion. `Categoricals` can only take on only a limited, and usually fixed, number of possible values (`categories`). In contrast to statistical categorical @@ -235,7 +235,7 @@ class Categorical(ExtensionArray, PandasObject): The unique categories for this categorical. If not given, the categories are assumed to be the unique values of `values` (sorted, if possible, otherwise in the order in which they appear). - ordered : boolean, (default False) + ordered : bool, default False Whether or not this categorical is treated as a ordered categorical. If True, the resulting categorical will be ordered. An ordered categorical respects, when sorted, the order of its @@ -253,7 +253,7 @@ class Categorical(ExtensionArray, PandasObject): codes : ndarray The codes (integer positions, which point to the categories) of this categorical, read only. - ordered : boolean + ordered : bool Whether or not this Categorical is ordered. dtype : CategoricalDtype The instance of ``CategoricalDtype`` storing the ``categories`` @@ -297,7 +297,7 @@ class Categorical(ExtensionArray, PandasObject): Ordered `Categoricals` can be sorted according to the custom order of the categories and can have a min and max value. - >>> c = pd.Categorical(['a','b','c','a','b','c'], ordered=True, + >>> c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'], ordered=True, ... categories=['c', 'b', 'a']) >>> c [a, b, c, a, b, c] @@ -618,7 +618,7 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): ---------- codes : array-like, integers An integer array, where each integer points to a category in - categories or dtype.categories, or else is -1 for NaN + categories or dtype.categories, or else is -1 for NaN. categories : index-like, optional The categories for the categorical. Items need to be unique. If the categories are not given here, then they must be provided @@ -700,7 +700,7 @@ def _set_categories(self, categories, fastpath=False): Parameters ---------- - fastpath : boolean (default: False) + fastpath : bool, default False Don't perform validation of the categories for uniqueness or nulls Examples @@ -747,15 +747,15 @@ def _set_dtype(self, dtype): def set_ordered(self, value, inplace=False): """ - Set the ordered attribute to the boolean value + Set the ordered attribute to the boolean value. Parameters ---------- - value : boolean to set whether this categorical is ordered (True) or - not (False) - inplace : boolean (default: False) - Whether or not to set the ordered attribute inplace or return a copy - of this categorical with ordered set to the value + value : bool + Set whether this categorical is ordered (True) or not (False). + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to the value. """ inplace = validate_bool_kwarg(inplace, 'inplace') new_dtype = CategoricalDtype(self.categories, ordered=value) @@ -770,9 +770,9 @@ def as_ordered(self, inplace=False): Parameters ---------- - inplace : boolean (default: False) - Whether or not to set the ordered attribute inplace or return a copy - of this categorical with ordered set to True + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to True. """ inplace = validate_bool_kwarg(inplace, 'inplace') return self.set_ordered(True, inplace=inplace) @@ -783,9 +783,9 @@ def as_unordered(self, inplace=False): Parameters ---------- - inplace : boolean (default: False) - Whether or not to set the ordered attribute inplace or return a copy - of this categorical with ordered set to False + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to False. """ inplace = validate_bool_kwarg(inplace, 'inplace') return self.set_ordered(False, inplace=inplace) @@ -815,19 +815,19 @@ def set_categories(self, new_categories, ordered=None, rename=False, ---------- new_categories : Index-like The categories in new order. - ordered : boolean, (default: False) + ordered : bool, default False Whether or not the categorical is treated as a ordered categorical. If not given, do not change the ordered information. - rename : boolean (default: False) + rename : bool, default False Whether or not the new_categories should be considered as a rename of the old categories or as reordered categories. - inplace : boolean (default: False) - Whether or not to reorder the categories inplace or return a copy of - this categorical with reordered categories. + inplace : bool, default False + Whether or not to reorder the categories in-place or return a copy + of this categorical with reordered categories. Returns ------- - cat : Categorical with reordered categories or None if inplace. + Categorical with reordered categories or None if inplace. Raises ------ @@ -890,7 +890,7 @@ def rename_categories(self, new_categories, inplace=False): Currently, Series are considered list like. In a future version of pandas they'll be considered dict-like. - inplace : boolean (default: False) + inplace : bool, default False Whether or not to rename the categories inplace or return a copy of this categorical with renamed categories. @@ -967,10 +967,10 @@ def reorder_categories(self, new_categories, ordered=None, inplace=False): ---------- new_categories : Index-like The categories in new order. - ordered : boolean, optional + ordered : bool, optional Whether or not the categorical is treated as a ordered categorical. If not given, do not change the ordered information. - inplace : boolean (default: False) + inplace : bool, default False Whether or not to reorder the categories inplace or return a copy of this categorical with reordered categories. @@ -1010,7 +1010,7 @@ def add_categories(self, new_categories, inplace=False): ---------- new_categories : category or list-like of category The new categories to be included. - inplace : boolean (default: False) + inplace : bool, default False Whether or not to add the categories inplace or return a copy of this categorical with added categories. @@ -1060,7 +1060,7 @@ def remove_categories(self, removals, inplace=False): ---------- removals : category or list of categories The categories which should be removed. - inplace : boolean (default: False) + inplace : bool, default False Whether or not to remove the categories inplace or return a copy of this categorical with removed categories. @@ -1108,7 +1108,7 @@ def remove_unused_categories(self, inplace=False): Parameters ---------- - inplace : boolean (default: False) + inplace : bool, default False Whether or not to drop unused categories inplace or return a copy of this categorical with unused categories dropped. @@ -1460,7 +1460,7 @@ def value_counts(self, dropna=True): Parameters ---------- - dropna : boolean, default True + dropna : bool, default True Don't include counts of NaN. Returns @@ -1581,9 +1581,9 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'): Parameters ---------- - inplace : boolean, default False + inplace : bool, default False Do operation in place. - ascending : boolean, default True + ascending : bool, default True Order ascending. Passing False orders descending. The ordering parameter provides the method by which the category values are organized. @@ -2172,7 +2172,7 @@ def _reverse_indexer(self): return result # reduction ops # - def _reduce(self, name, axis=0, skipna=True, **kwargs): + def _reduce(self, name, axis=0, **kwargs): func = getattr(self, name, None) if func is None: msg = 'Categorical cannot perform the operation {op}' @@ -2239,7 +2239,7 @@ def mode(self, dropna=True): Parameters ---------- - dropna : boolean, default True + dropna : bool, default True Don't consider counts of NaN/NaT. .. versionadded:: 0.24.0 @@ -2332,7 +2332,7 @@ def equals(self, other): Returns ------- - are_equal : boolean + bool """ if self.is_dtype_equal(other): if self.categories.equals(other.categories): @@ -2356,7 +2356,7 @@ def is_dtype_equal(self, other): Returns ------- - are_equal : boolean + bool """ try: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1b2a4da389dc4..cd8e8ed520ddc 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -799,14 +799,14 @@ def tz_convert(self, tz): Parameters ---------- - tz : string, pytz.timezone, dateutil.tz.tzfile or None + tz : str, pytz.timezone, dateutil.tz.tzfile or None Time zone for time. Corresponding timestamps would be converted to this time zone of the Datetime Array/Index. A `tz` of None will convert to UTC and remove the timezone information. Returns ------- - normalized : same type as self + Array or Index Raises ------ @@ -842,7 +842,7 @@ def tz_convert(self, tz): With the ``tz=None``, we can remove the timezone (after converting to UTC if necessary): - >>> dti = pd.date_range(start='2014-08-01 09:00',freq='H', + >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H', ... periods=3, tz='Europe/Berlin') >>> dti @@ -882,7 +882,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', Parameters ---------- - tz : string, pytz.timezone, dateutil.tz.tzfile or None + tz : str, pytz.timezone, dateutil.tz.tzfile or None Time zone to convert timestamps to. Passing ``None`` will remove the time zone information preserving local time. ambiguous : 'infer', 'NaT', bool array, default 'raise' @@ -930,7 +930,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', Returns ------- - result : same type as self + Same type as self Array/Index converted to the specified time zone. Raises @@ -970,43 +970,39 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', Be careful with DST changes. When there is sequential data, pandas can infer the DST time: - >>> s = pd.to_datetime(pd.Series([ - ... '2018-10-28 01:30:00', - ... '2018-10-28 02:00:00', - ... '2018-10-28 02:30:00', - ... '2018-10-28 02:00:00', - ... '2018-10-28 02:30:00', - ... '2018-10-28 03:00:00', - ... '2018-10-28 03:30:00'])) + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) >>> s.dt.tz_localize('CET', ambiguous='infer') - 2018-10-28 01:30:00+02:00 0 - 2018-10-28 02:00:00+02:00 1 - 2018-10-28 02:30:00+02:00 2 - 2018-10-28 02:00:00+01:00 3 - 2018-10-28 02:30:00+01:00 4 - 2018-10-28 03:00:00+01:00 5 - 2018-10-28 03:30:00+01:00 6 - dtype: int64 + 0 2018-10-28 01:30:00+02:00 + 1 2018-10-28 02:00:00+02:00 + 2 2018-10-28 02:30:00+02:00 + 3 2018-10-28 02:00:00+01:00 + 4 2018-10-28 02:30:00+01:00 + 5 2018-10-28 03:00:00+01:00 + 6 2018-10-28 03:30:00+01:00 + dtype: datetime64[ns, CET] In some cases, inferring the DST is impossible. In such cases, you can pass an ndarray to the ambiguous parameter to set the DST explicitly - >>> s = pd.to_datetime(pd.Series([ - ... '2018-10-28 01:20:00', - ... '2018-10-28 02:36:00', - ... '2018-10-28 03:46:00'])) + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) - 0 2018-10-28 01:20:00+02:00 - 1 2018-10-28 02:36:00+02:00 - 2 2018-10-28 03:46:00+01:00 - dtype: datetime64[ns, CET] + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] If the DST transition causes nonexistent times, you can shift these dates forward or backwards with a timedelta object or `'shift_forward'` or `'shift_backwards'`. - >>> s = pd.to_datetime(pd.Series([ - ... '2015-03-29 02:30:00', - ... '2015-03-29 03:30:00'])) + >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'])) >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 0 2015-03-29 03:00:00+02:00 1 2015-03-29 03:30:00+02:00 @@ -1129,7 +1125,7 @@ def to_period(self, freq=None): Parameters ---------- - freq : string or Offset, optional + freq : str or Offset, optional One of pandas' :ref:`offset strings ` or an Offset object. Will be inferred by default. @@ -1150,7 +1146,7 @@ def to_period(self, freq=None): Examples -------- - >>> df = pd.DataFrame({"y": [1,2,3]}, + >>> df = pd.DataFrame({"y": [1, 2, 3]}, ... index=pd.to_datetime(["2000-03-31 00:00:00", ... "2000-05-31 00:00:00", ... "2000-08-31 00:00:00"])) diff --git a/pandas/core/base.py b/pandas/core/base.py index 5a98e83c65884..7fdc64a8d9f85 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -794,7 +794,7 @@ def array(self): Returns ------- - array : ExtensionArray + ExtensionArray An ExtensionArray of the values stored within. For extension types, this is the actual array. For NumPy native types, this is a thin (no copy) wrapper around :class:`numpy.ndarray`. @@ -1022,7 +1022,7 @@ def max(self, axis=None, skipna=True): def argmax(self, axis=None, skipna=True): """ - Return a ndarray of the maximum argument indexer. + Return an ndarray of the maximum argument indexer. Parameters ---------- @@ -1087,6 +1087,10 @@ def argmin(self, axis=None, skipna=True): Dummy argument for consistency with Series skipna : bool, default True + Returns + ------- + numpy.ndarray + See Also -------- numpy.ndarray.argmin @@ -1102,6 +1106,10 @@ def tolist(self): (for str, int, float) or a pandas scalar (for Timestamp/Timedelta/Interval/Period) + Returns + ------- + list + See Also -------- numpy.ndarray.tolist @@ -1162,7 +1170,7 @@ def _map_values(self, mapper, na_action=None): Returns ------- - applied : Union[Index, MultiIndex], inferred + Union[Index, MultiIndex], inferred The output of the mapping function applied to the index. If the function returns a tuple with more than one element a MultiIndex will be returned. @@ -1246,7 +1254,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, Returns ------- - counts : Series + Series See Also -------- @@ -1363,7 +1371,7 @@ def is_unique(self): Returns ------- - is_unique : boolean + bool """ return self.nunique(dropna=False) == len(self) @@ -1377,7 +1385,7 @@ def is_monotonic(self): Returns ------- - is_monotonic : boolean + bool """ from pandas import Index return Index(self).is_monotonic @@ -1394,7 +1402,7 @@ def is_monotonic_decreasing(self): Returns ------- - is_monotonic_decreasing : boolean + bool """ from pandas import Index return Index(self).is_monotonic_decreasing diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cf97c94f6d129..a239ff4b4d5db 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6346,6 +6346,8 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, Returns ------- Series or DataFrame + Result of applying ``func`` along the given axis of the + DataFrame. See Also -------- @@ -6364,7 +6366,7 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, Examples -------- - >>> df = pd.DataFrame([[4, 9],] * 3, columns=['A', 'B']) + >>> df = pd.DataFrame([[4, 9]] * 3, columns=['A', 'B']) >>> df A B 0 4 9 diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e2308836d982a..3a73861086bed 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5959,17 +5959,18 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, value : scalar, dict, Series, or DataFrame Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame of values specifying which value to use for - each index (for a Series) or column (for a DataFrame). (values not - in the dict/Series/DataFrame will not be filled). This value cannot + each index (for a Series) or column (for a DataFrame). Values not + in the dict/Series/DataFrame will not be filled. This value cannot be a list. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap + backfill / bfill: use next valid observation to fill gap. axis : %(axes_single_arg)s - inplace : boolean, default False - If True, fill in place. Note: this will modify any - other views on this object, (e.g. a no-copy slice for a column in a + Axis along which to fill missing values. + inplace : bool, default False + If True, fill in-place. Note: this will modify any + other views on this object (e.g., a no-copy slice for a column in a DataFrame). limit : int, default None If method is specified, this is the maximum number of consecutive @@ -5979,18 +5980,20 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, maximum number of entries along the entire axis where NaNs will be filled. Must be greater than 0 if not None. downcast : dict, default is None - a dict of item->dtype of what to downcast if possible, + A dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate - equal type (e.g. float64 to int64 if possible) + equal type (e.g. float64 to int64 if possible). Returns ------- - filled : %(klass)s + %(klass)s + Object with missing values filled. See Also -------- interpolate : Fill NaN values using interpolation. - reindex, asfreq + reindex : Conform object to new index. + asfreq : Convert TimeSeries to specified frequency. Examples -------- @@ -5998,7 +6001,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, ... [3, 4, np.nan, 1], ... [np.nan, np.nan, np.nan, 5], ... [np.nan, 3, np.nan, 4]], - ... columns=list('ABCD')) + ... columns=list('ABCD')) >>> df A B C D 0 NaN 2.0 NaN 0 @@ -6752,7 +6755,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Note how the first entry in column 'b' remains ``NaN``, because there is no entry befofe it to use for interpolation. - >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), + >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), ... (np.nan, 2.0, np.nan, np.nan), ... (2.0, 3.0, np.nan, 9.0), ... (np.nan, 4.0, -4.0, 16.0)], @@ -7221,9 +7224,9 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, upper : float or array_like, default None Maximum threshold value. All values above this threshold will be set to it. - axis : int or string axis name, optional + axis : int or str axis name, optional Align object with lower and upper along the given axis. - inplace : boolean, default False + inplace : bool, default False Whether to perform the operation in place on the data. .. versionadded:: 0.21.0 @@ -7345,7 +7348,7 @@ def clip_upper(self, threshold, axis=None, inplace=False): axis : {0 or 'index', 1 or 'columns'}, default 0 Align object with `threshold` along the given axis. - inplace : boolean, default False + inplace : bool, default False Whether to perform the operation in place on the data. .. versionadded:: 0.21.0 @@ -7426,7 +7429,7 @@ def clip_lower(self, threshold, axis=None, inplace=False): axis : {0 or 'index', 1 or 'columns'}, default 0 Align `self` with `threshold` along the given axis. - inplace : boolean, default False + inplace : bool, default False Whether to perform the operation in place on the data. .. versionadded:: 0.21.0 @@ -7583,9 +7586,9 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, Examples -------- - >>> df = pd.DataFrame({'Animal' : ['Falcon', 'Falcon', - ... 'Parrot', 'Parrot'], - ... 'Max Speed' : [380., 370., 24., 26.]}) + >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon', + ... 'Parrot', 'Parrot'], + ... 'Max Speed': [380., 370., 24., 26.]}) >>> df Animal Max Speed 0 Falcon 380.0 @@ -7604,16 +7607,16 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, using the `level` parameter: >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], - ... ['Capitve', 'Wild', 'Capitve', 'Wild']] + ... ['Captive', 'Wild', 'Captive', 'Wild']] >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) - >>> df = pd.DataFrame({'Max Speed' : [390., 350., 30., 20.]}, - ... index=index) + >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]}, + ... index=index) >>> df Max Speed Animal Type - Falcon Capitve 390.0 + Falcon Captive 390.0 Wild 350.0 - Parrot Capitve 30.0 + Parrot Captive 30.0 Wild 20.0 >>> df.groupby(level=0).mean() Max Speed @@ -7623,7 +7626,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, >>> df.groupby(level=1).mean() Max Speed Type - Capitve 210.0 + Captive 210.0 Wild 185.0 """ from pandas.core.groupby.groupby import groupby @@ -7740,14 +7743,14 @@ def at_time(self, time, asof=False, axis=None): Parameters ---------- - time : datetime.time or string + time : datetime.time or str axis : {0 or 'index', 1 or 'columns'}, default 0 .. versionadded:: 0.24.0 Returns ------- - values_at_time : same type as caller + Series or DataFrame Raises ------ @@ -7765,7 +7768,7 @@ def at_time(self, time, asof=False, axis=None): Examples -------- >>> i = pd.date_range('2018-04-09', periods=4, freq='12H') - >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) >>> ts A 2018-04-09 00:00:00 1 @@ -7800,17 +7803,17 @@ def between_time(self, start_time, end_time, include_start=True, Parameters ---------- - start_time : datetime.time or string - end_time : datetime.time or string - include_start : boolean, default True - include_end : boolean, default True + start_time : datetime.time or str + end_time : datetime.time or str + include_start : bool, default True + include_end : bool, default True axis : {0 or 'index', 1 or 'columns'}, default 0 .. versionadded:: 0.24.0 Returns ------- - values_between_time : same type as caller + Series or DataFrame Raises ------ @@ -7828,7 +7831,7 @@ def between_time(self, start_time, end_time, include_start=True, Examples -------- >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') - >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) >>> ts A 2018-04-09 00:00:00 1 diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 27e13e86a6e9e..52056a6842ed9 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -964,7 +964,7 @@ def _transform_fast(self, func, func_nm): ids, _, ngroup = self.grouper.group_info cast = self._transform_should_cast(func_nm) - out = algorithms.take_1d(func().values, ids) + out = algorithms.take_1d(func()._values, ids) if cast: out = self._try_cast(out, self.obj) return Series(out, index=self.obj.index, name=self.obj.name) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index df91c71cfe238..1037e2d9a3bd6 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1010,7 +1010,7 @@ def get_loc(self, key, method=None, tolerance=None): except (KeyError, ValueError, TypeError): try: return self._get_string_slice(key) - except (TypeError, KeyError, ValueError): + except (TypeError, KeyError, ValueError, OverflowError): pass try: @@ -1302,20 +1302,19 @@ def indexer_at_time(self, time, asof=False): -------- indexer_between_time, DataFrame.at_time """ - from dateutil.parser import parse - if asof: raise NotImplementedError("'asof' argument is not supported") if isinstance(time, compat.string_types): + from dateutil.parser import parse time = parse(time).time() if time.tzinfo: - # TODO - raise NotImplementedError("argument 'time' with timezone info is " - "not supported") - - time_micros = self._get_time_micros() + if self.tz is None: + raise ValueError("Index must be timezone aware.") + time_micros = self.tz_convert(time.tzinfo)._get_time_micros() + else: + time_micros = self._get_time_micros() micros = _time_to_micros(time) return (micros == time_micros).nonzero()[0] diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 53671e00e88b4..a6c945ac2e464 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -38,15 +38,15 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, If a dict is passed, the sorted keys will be used as the `keys` argument, unless it is passed, in which case the values will be selected (see below). Any None objects will be dropped silently unless - they are all None in which case a ValueError will be raised + they are all None in which case a ValueError will be raised. axis : {0/'index', 1/'columns'}, default 0 - The axis to concatenate along + The axis to concatenate along. join : {'inner', 'outer'}, default 'outer' - How to handle indexes on other axis(es) + How to handle indexes on other axis (or axes). join_axes : list of Index objects Specific indexes to use for the other n - 1 axes instead of performing - inner/outer set logic - ignore_index : boolean, default False + inner/outer set logic. + ignore_index : bool, default False If True, do not use the index values along the concatenation axis. The resulting axis will be labeled 0, ..., n - 1. This is useful if you are concatenating objects where the concatenation axis does not have @@ -54,16 +54,16 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, axes are still respected in the join. keys : sequence, default None If multiple levels passed, should contain tuples. Construct - hierarchical index using the passed keys as the outermost level + hierarchical index using the passed keys as the outermost level. levels : list of sequences, default None Specific levels (unique values) to use for constructing a - MultiIndex. Otherwise they will be inferred from the keys + MultiIndex. Otherwise they will be inferred from the keys. names : list, default None - Names for the levels in the resulting hierarchical index - verify_integrity : boolean, default False + Names for the levels in the resulting hierarchical index. + verify_integrity : bool, default False Check whether the new concatenated axis contains duplicates. This can - be very expensive relative to the actual data concatenation - sort : boolean, default None + be very expensive relative to the actual data concatenation. + sort : bool, default None Sort non-concatenation axis if it is not already aligned when `join` is 'outer'. The current default of sorting is deprecated and will change to not-sorting in a future version of pandas. @@ -76,12 +76,12 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, .. versionadded:: 0.23.0 - copy : boolean, default True - If False, do not copy data unnecessarily + copy : bool, default True + If False, do not copy data unnecessarily. Returns ------- - concatenated : object, type of objs + object, type of objs When concatenating all ``Series`` along the index (axis=0), a ``Series`` is returned. When ``objs`` contains at least one ``DataFrame``, a ``DataFrame`` is returned. When concatenating along @@ -89,10 +89,10 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, See Also -------- - Series.append - DataFrame.append - DataFrame.join - DataFrame.merge + Series.append : Concatenate Series. + DataFrame.append : Concatenate DataFrames. + DataFrame.join : Join DataFrames using indexes. + DataFrame.merge : Merge DataFrames by indexes or columns. Notes ----- @@ -128,7 +128,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, Add a hierarchical index at the outermost level of the data with the ``keys`` option. - >>> pd.concat([s1, s2], keys=['s1', 's2',]) + >>> pd.concat([s1, s2], keys=['s1', 's2']) s1 0 a 1 b s2 0 c diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 5139ebbf85a4a..d2a8c4d56bd6c 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -405,36 +405,36 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, aggfunc=None, margins=False, margins_name='All', dropna=True, normalize=False): """ - Compute a simple cross-tabulation of two (or more) factors. By default + Compute a simple cross tabulation of two (or more) factors. By default computes a frequency table of the factors unless an array of values and an - aggregation function are passed + aggregation function are passed. Parameters ---------- index : array-like, Series, or list of arrays/Series - Values to group by in the rows + Values to group by in the rows. columns : array-like, Series, or list of arrays/Series - Values to group by in the columns + Values to group by in the columns. values : array-like, optional Array of values to aggregate according to the factors. Requires `aggfunc` be specified. rownames : sequence, default None - If passed, must match number of row arrays passed + If passed, must match number of row arrays passed. colnames : sequence, default None - If passed, must match number of column arrays passed + If passed, must match number of column arrays passed. aggfunc : function, optional - If specified, requires `values` be specified as well - margins : boolean, default False - Add row/column margins (subtotals) - margins_name : string, default 'All' - Name of the row / column that will contain the totals + If specified, requires `values` be specified as well. + margins : bool, default False + Add row/column margins (subtotals). + margins_name : str, default 'All' + Name of the row/column that will contain the totals when margins is True. .. versionadded:: 0.21.0 - dropna : boolean, default True - Do not include columns whose entries are all NaN - normalize : boolean, {'all', 'index', 'columns'}, or {0,1}, default False + dropna : bool, default True + Do not include columns whose entries are all NaN. + normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False Normalize by dividing all values by the sum of values. - If passed 'all' or `True`, will normalize over all values. @@ -446,7 +446,13 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, Returns ------- - crosstab : DataFrame + DataFrame + Cross tabulation of the data. + + See Also + -------- + DataFrame.pivot : Reshape data based on column values. + pivot_table : Create a pivot table as a DataFrame. Notes ----- @@ -468,32 +474,26 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, ... "one", "two", "two", "two", "one"], dtype=object) >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", ... "shiny", "dull", "shiny", "shiny", "shiny"], - ... dtype=object) - + ... dtype=object) >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) - ... # doctest: +NORMALIZE_WHITESPACE b one two c dull shiny dull shiny a bar 1 2 1 0 foo 2 2 1 2 + Here 'c' and 'f' are not represented in the data and will not be + shown in the output because dropna is True by default. Set + dropna=False to preserve categories with no data. + >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) - >>> crosstab(foo, bar) # 'c' and 'f' are not represented in the data, - # and will not be shown in the output because - # dropna is True by default. Set 'dropna=False' - # to preserve categories with no data - ... # doctest: +SKIP + >>> pd.crosstab(foo, bar) col_0 d e row_0 a 1 0 b 0 1 - - >>> crosstab(foo, bar, dropna=False) # 'c' and 'f' are not represented - # in the data, but they still will be counted - # and shown in the output - ... # doctest: +SKIP + >>> pd.crosstab(foo, bar, dropna=False) col_0 d e f row_0 a 1 0 0 diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index f436b3b92a359..6ba33301753d6 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -701,19 +701,20 @@ def _convert_level_number(level_num, columns): def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, columns=None, sparse=False, drop_first=False, dtype=None): """ - Convert categorical variable into dummy/indicator variables + Convert categorical variable into dummy/indicator variables. Parameters ---------- data : array-like, Series, or DataFrame - prefix : string, list of strings, or dict of strings, default None + Data of which to get dummy indicators. + prefix : str, list of str, or dict of str, default None String to append DataFrame column names. Pass a list with length equal to the number of columns when calling get_dummies on a DataFrame. Alternatively, `prefix` can be a dictionary mapping column names to prefixes. - prefix_sep : string, default '_' + prefix_sep : str, default '_' If appending prefix, separator/delimiter to use. Or pass a - list or dictionary as with `prefix.` + list or dictionary as with `prefix`. dummy_na : bool, default False Add a column to indicate NaNs, if False NaNs are ignored. columns : list-like, default None @@ -736,11 +737,12 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, Returns ------- - dummies : DataFrame + DataFrame + Dummy-coded data. See Also -------- - Series.str.get_dummies + Series.str.get_dummies : Convert Series to dummy codes. Examples -------- diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 2a654fec36a9f..f99fd9004bb31 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -163,7 +163,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, Use `drop` optional when bins is not unique >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, - ... right=False, duplicates='drop') + ... right=False, duplicates='drop') ... # doctest: +ELLIPSIS (a 0.0 b 1.0 diff --git a/pandas/core/series.py b/pandas/core/series.py index 31c6247436418..a5dfe8d43c336 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3678,8 +3678,12 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, if axis is not None: self._get_axis_number(axis) - # dispatch to ExtensionArray interface - if isinstance(delegate, ExtensionArray): + if isinstance(delegate, Categorical): + # TODO deprecate numeric_only argument for Categorical and use + # skipna as well, see GH25303 + return delegate._reduce(name, numeric_only=numeric_only, **kwds) + elif isinstance(delegate, ExtensionArray): + # dispatch to ExtensionArray interface return delegate._reduce(name, skipna=skipna, **kwds) elif is_datetime64_dtype(delegate): # use DatetimeIndex implementation to handle skipna correctly diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 183a91c952140..cc7a4db515c42 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -120,7 +120,7 @@ def str_count(arr, pat, flags=0): Returns ------- - counts : Series or Index + Series or Index Same type as the calling object containing the integer counts. See Also @@ -283,7 +283,7 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True): return `True`. However, '.0' as a regex matches any character followed by a 0. - >>> s2 = pd.Series(['40','40.0','41','41.0','35']) + >>> s2 = pd.Series(['40', '40.0', '41', '41.0', '35']) >>> s2.str.contains('.0', regex=True) 0 True 1 True @@ -433,13 +433,13 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): Parameters ---------- - pat : string or compiled regex + pat : str or compiled regex String can be a character sequence or regular expression. .. versionadded:: 0.20.0 `pat` also accepts a compiled regex. - repl : string or callable + repl : str or callable Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. See :func:`re.sub`. @@ -448,15 +448,15 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): `repl` also accepts a callable. n : int, default -1 (all) - Number of replacements to make from start - case : boolean, default None + Number of replacements to make from start. + case : bool, default None - If True, case sensitive (the default if `pat` is a string) - Set to False for case insensitive - Cannot be set if `pat` is a compiled regex flags : int, default 0 (no flags) - re module flags, e.g. re.IGNORECASE - Cannot be set if `pat` is a compiled regex - regex : boolean, default True + regex : bool, default True - If True, assumes the passed-in pattern is a regular expression. - If False, treats the pattern as a literal string - Cannot be set to False if `pat` is a compiled regex or `repl` is @@ -537,6 +537,7 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): Using a compiled regex with flags + >>> import re >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE) >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar') 0 foo @@ -604,6 +605,7 @@ def str_repeat(arr, repeats): 0 a 1 b 2 c + dtype: object Single int repeats string in Series @@ -611,6 +613,7 @@ def str_repeat(arr, repeats): 0 aa 1 bb 2 cc + dtype: object Sequence of int repeats corresponding string in Series @@ -618,6 +621,7 @@ def str_repeat(arr, repeats): 0 a 1 bb 2 ccc + dtype: object """ if is_scalar(repeats): def rep(x): @@ -646,13 +650,14 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan): Parameters ---------- - pat : string - Character sequence or regular expression - case : boolean, default True - If True, case sensitive + pat : str + Character sequence or regular expression. + case : bool, default True + If True, case sensitive. flags : int, default 0 (no flags) - re module flags, e.g. re.IGNORECASE - na : default NaN, fill value for missing values + re module flags, e.g. re.IGNORECASE. + na : default NaN + Fill value for missing values. Returns ------- @@ -768,7 +773,7 @@ def str_extract(arr, pat, flags=0, expand=True): Parameters ---------- - pat : string + pat : str Regular expression pattern with capturing groups. flags : int, default 0 (no flags) Flags from the ``re`` module, e.g. ``re.IGNORECASE``, that @@ -966,21 +971,23 @@ def str_extractall(arr, pat, flags=0): def str_get_dummies(arr, sep='|'): """ - Split each string in the Series by sep and return a frame of - dummy/indicator variables. + Split each string in the Series by sep and return a DataFrame + of dummy/indicator variables. Parameters ---------- - sep : string, default "|" + sep : str, default "|" String to split on. Returns ------- - dummies : DataFrame + DataFrame + Dummy variables corresponding to values of the Series. See Also -------- - get_dummies + get_dummies : Convert categorical variable into dummy/indicator + variables. Examples -------- @@ -1089,11 +1096,11 @@ def str_findall(arr, pat, flags=0): Parameters ---------- - pat : string + pat : str Pattern or regular expression. flags : int, default 0 - ``re`` module flags, e.g. `re.IGNORECASE` (default is 0, which means - no flags). + Flags from ``re`` module, e.g. `re.IGNORECASE` (default is 0, which + means no flags). Returns ------- @@ -1182,17 +1189,18 @@ def str_find(arr, sub, start=0, end=None, side='left'): Parameters ---------- sub : str - Substring being searched + Substring being searched. start : int - Left edge index + Left edge index. end : int - Right edge index + Right edge index. side : {'left', 'right'}, default 'left' - Specifies a starting side, equivalent to ``find`` or ``rfind`` + Specifies a starting side, equivalent to ``find`` or ``rfind``. Returns ------- - found : Series/Index of integer values + Series or Index + Indexes where substring is found. """ if not isinstance(sub, compat.string_types): @@ -1430,7 +1438,7 @@ def str_slice_replace(arr, start=None, stop=None, repl=None): Returns ------- - replaced : Series or Index + Series or Index Same type as the original object. See Also @@ -1513,7 +1521,7 @@ def str_strip(arr, to_strip=None, side='both'): Returns ------- - stripped : Series/Index of objects + Series or Index """ if side == 'both': f = lambda x: x.strip(to_strip) @@ -1537,30 +1545,30 @@ def str_wrap(arr, width, **kwargs): Parameters ---------- width : int - Maximum line-width + Maximum line width. expand_tabs : bool, optional - If true, tab characters will be expanded to spaces (default: True) + If True, tab characters will be expanded to spaces (default: True). replace_whitespace : bool, optional - If true, each whitespace character (as defined by string.whitespace) + If True, each whitespace character (as defined by string.whitespace) remaining after tab expansion will be replaced by a single space - (default: True) + (default: True). drop_whitespace : bool, optional - If true, whitespace that, after wrapping, happens to end up at the - beginning or end of a line is dropped (default: True) + If True, whitespace that, after wrapping, happens to end up at the + beginning or end of a line is dropped (default: True). break_long_words : bool, optional - If true, then words longer than width will be broken in order to ensure + If True, then words longer than width will be broken in order to ensure that no lines are longer than width. If it is false, long words will - not be broken, and some lines may be longer than width. (default: True) + not be broken, and some lines may be longer than width (default: True). break_on_hyphens : bool, optional - If true, wrapping will occur preferably on whitespace and right after + If True, wrapping will occur preferably on whitespace and right after hyphens in compound words, as it is customary in English. If false, only whitespaces will be considered as potentially good places for line breaks, but you need to set break_long_words to false if you want truly - insecable words. (default: True) + insecable words (default: True). Returns ------- - wrapped : Series/Index of objects + Series or Index Notes ----- @@ -1581,6 +1589,7 @@ def str_wrap(arr, width, **kwargs): >>> s.str.wrap(12) 0 line to be\nwrapped 1 another line\nto be\nwrapped + dtype: object """ kwargs['width'] = width @@ -1613,7 +1622,7 @@ def str_translate(arr, table, deletechars=None): Returns ------- - translated : Series/Index of objects + Series or Index """ if deletechars is None: f = lambda x: x.translate(table) @@ -1641,15 +1650,16 @@ def str_get(arr, i): Returns ------- - items : Series/Index of objects + Series or Index Examples -------- >>> s = pd.Series(["String", - (1, 2, 3), - ["a", "b", "c"], - 123, -456, - {1:"Hello", "2":"World"}]) + ... (1, 2, 3), + ... ["a", "b", "c"], + ... 123, + ... -456, + ... {1: "Hello", "2": "World"}]) >>> s 0 String 1 (1, 2, 3) @@ -1674,7 +1684,7 @@ def str_get(arr, i): 2 c 3 NaN 4 NaN - 5 NaN + 5 None dtype: object """ def f(x): @@ -1699,7 +1709,7 @@ def str_decode(arr, encoding, errors="strict"): Returns ------- - decoded : Series/Index of objects + Series or Index """ if encoding in _cpython_optimized_decoders: # CPython optimized implementation @@ -2091,7 +2101,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): Returns ------- - concat : str or Series/Index of objects + str, Series or Index If `others` is None, `str` is returned, otherwise a `Series/Index` (same type as caller) of objects is returned. diff --git a/pandas/io/formats/terminal.py b/pandas/io/formats/terminal.py index bb34259d710c7..cf2383955d593 100644 --- a/pandas/io/formats/terminal.py +++ b/pandas/io/formats/terminal.py @@ -15,6 +15,7 @@ import os import shutil +import subprocess from pandas.compat import PY3 @@ -94,22 +95,29 @@ def _get_terminal_size_tput(): # get terminal width # src: http://stackoverflow.com/questions/263890/how-do-i-find-the-width # -height-of-a-terminal-window + try: - import subprocess proc = subprocess.Popen(["tput", "cols"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - output = proc.communicate(input=None) - cols = int(output[0]) + output_cols = proc.communicate(input=None) proc = subprocess.Popen(["tput", "lines"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - output = proc.communicate(input=None) - rows = int(output[0]) - return (cols, rows) + output_rows = proc.communicate(input=None) except OSError: return None + try: + # Some terminals (e.g. spyder) may report a terminal size of '', + # making the `int` fail. + + cols = int(output_cols[0]) + rows = int(output_rows[0]) + return cols, rows + except (ValueError, IndexError): + return None + def _get_terminal_size_linux(): def ioctl_GWINSZ(fd): diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index bc37317f72802..31e81a9ca77c2 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -6,6 +6,7 @@ import numpy as np import pytest +import pytz from pandas.compat import product @@ -647,6 +648,28 @@ def test_at_time(self): rs = ts.at_time('16:00') assert len(rs) == 0 + @pytest.mark.parametrize('hour', ['1:00', '1:00AM', time(1), + time(1, tzinfo=pytz.UTC)]) + def test_at_time_errors(self, hour): + # GH 24043 + dti = pd.date_range('2018', periods=3, freq='H') + df = pd.DataFrame(list(range(len(dti))), index=dti) + if getattr(hour, 'tzinfo', None) is None: + result = df.at_time(hour) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(ValueError, match="Index must be timezone"): + df.at_time(hour) + + def test_at_time_tz(self): + # GH 24043 + dti = pd.date_range('2018', periods=3, freq='H', tz='US/Pacific') + df = pd.DataFrame(list(range(len(dti))), index=dti) + result = df.at_time(time(4, tzinfo=pytz.timezone('US/Eastern'))) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + def test_at_time_raises(self): # GH20725 df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index f120402e6e8ca..b645073fcf72a 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -834,3 +834,14 @@ def demean_rename(x): tm.assert_frame_equal(result, expected) result_single = df.groupby('group').value.transform(demean_rename) tm.assert_series_equal(result_single, expected['value']) + + +@pytest.mark.parametrize('func', [min, max, np.min, np.max, 'first', 'last']) +def test_groupby_transform_timezone_column(func): + # GH 24198 + ts = pd.to_datetime('now', utc=True).tz_convert('Asia/Singapore') + result = pd.DataFrame({'end_time': [ts], 'id': [1]}) + result['max_end_time'] = result.groupby('id').end_time.transform(func) + expected = pd.DataFrame([[ts, 1, ts]], columns=['end_time', 'id', + 'max_end_time']) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py index 055763bf62d6e..45c5e982c1c48 100644 --- a/pandas/tests/io/formats/test_console.py +++ b/pandas/tests/io/formats/test_console.py @@ -1,6 +1,9 @@ +import subprocess # noqa: F401 + import pytest from pandas.io.formats.console import detect_console_encoding +from pandas.io.formats.terminal import _get_terminal_size_tput class MockEncoding(object): # TODO(py27): replace with mock @@ -72,3 +75,19 @@ def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale): context.setattr('sys.stdout', MockEncoding(std)) context.setattr('sys.getdefaultencoding', lambda: 'sysDefaultEncoding') assert detect_console_encoding() == 'sysDefaultEncoding' + + +@pytest.mark.parametrize("size", ['', ['']]) +def test_terminal_unknown_dimensions(monkeypatch, size): + mock = pytest.importorskip("unittest.mock") + + def communicate(*args, **kwargs): + return size + + monkeypatch.setattr('subprocess.Popen', mock.Mock()) + monkeypatch.setattr('subprocess.Popen.return_value.returncode', None) + monkeypatch.setattr( + 'subprocess.Popen.return_value.communicate', communicate) + result = _get_terminal_size_tput() + + assert result is None diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 173f719edd465..8520855d14918 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -960,6 +960,27 @@ def test_min_max(self): assert np.isnan(_min) assert _max == 1 + def test_min_max_numeric_only(self): + # TODO deprecate numeric_only argument for Categorical and use + # skipna as well, see GH25303 + cat = Series(Categorical( + ["a", "b", np.nan, "a"], categories=['b', 'a'], ordered=True)) + + _min = cat.min() + _max = cat.max() + assert np.isnan(_min) + assert _max == "a" + + _min = cat.min(numeric_only=True) + _max = cat.max(numeric_only=True) + assert _min == "b" + assert _max == "a" + + _min = cat.min(numeric_only=False) + _max = cat.max(numeric_only=False) + assert np.isnan(_min) + assert _max == "a" + class TestSeriesMode(object): # Note: the name TestSeriesMode indicates these tests diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 69acf4ba6bde8..97f1e07380ef9 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -549,3 +549,25 @@ def test_selection_api_validation(): exp.index.name = 'd' assert_frame_equal(exp, df.resample('2D', level='d').sum()) + + +@pytest.mark.parametrize('col_name', ['t2', 't2x', 't2q', 'T_2M', + 't2p', 't2m', 't2m1', 'T2M']) +def test_agg_with_datetime_index_list_agg_func(col_name): + # GH 22660 + # The parametrized column names would get converted to dates by our + # date parser. Some would result in OutOfBoundsError (ValueError) while + # others would result in OverflowError when passed into Timestamp. + # We catch these errors and move on to the correct branch. + df = pd.DataFrame(list(range(200)), + index=pd.date_range(start='2017-01-01', freq='15min', + periods=200, tz='Europe/Berlin'), + columns=[col_name]) + result = df.resample('1d').aggregate(['mean']) + expected = pd.DataFrame([47.5, 143.5, 195.5], + index=pd.date_range(start='2017-01-01', freq='D', + periods=3, tz='Europe/Berlin'), + columns=pd.MultiIndex(levels=[[col_name], + ['mean']], + codes=[[0], [0]])) + assert_frame_equal(result, expected) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index abf95b276cda1..43747ea8621d9 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -9,7 +9,7 @@ from pandas import ( DatetimeIndex, Index, NaT, Period, Series, Timedelta, TimedeltaIndex, - Timestamp) + Timestamp, isna) from pandas.core.arrays import PeriodArray from pandas.util import testing as tm @@ -201,9 +201,10 @@ def _get_overlap_public_nat_methods(klass, as_tuple=False): "fromtimestamp", "isocalendar", "isoformat", "isoweekday", "month_name", "now", "replace", "round", "strftime", "strptime", "time", "timestamp", "timetuple", "timetz", - "to_datetime64", "to_pydatetime", "today", "toordinal", - "tz_convert", "tz_localize", "tzname", "utcfromtimestamp", - "utcnow", "utcoffset", "utctimetuple", "weekday"]), + "to_datetime64", "to_numpy", "to_pydatetime", "today", + "toordinal", "tz_convert", "tz_localize", "tzname", + "utcfromtimestamp", "utcnow", "utcoffset", "utctimetuple", + "weekday"]), (Timedelta, ["total_seconds"]) ]) def test_overlap_public_nat_methods(klass, expected): @@ -339,3 +340,11 @@ def test_nat_arithmetic_td64_vector(op_name, box): def test_nat_pinned_docstrings(): # see gh-17327 assert NaT.ctime.__doc__ == datetime.ctime.__doc__ + + +def test_to_numpy_alias(): + # GH 24653: alias .to_numpy() for scalars + expected = NaT.to_datetime64() + result = NaT.to_numpy() + + assert isna(expected) and isna(result) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 7d5b479810205..bf71c37aa9c3d 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -414,6 +414,11 @@ def test_timedelta_conversions(self): assert (Timedelta(timedelta(days=1)) == np.timedelta64(1, 'D').astype('m8[ns]')) + def test_to_numpy_alias(self): + # GH 24653: alias .to_numpy() for scalars + td = Timedelta('10m7s') + assert td.to_timedelta64() == td.to_numpy() + def test_round(self): t1 = Timedelta('1 days 02:34:56.789123456') diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index c27ef3d0662c8..f42fad4c925f0 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -969,3 +969,8 @@ def test_to_period_tz_warning(self): with tm.assert_produces_warning(UserWarning): # warning that timezone info will be lost ts.to_period('D') + + def test_to_numpy_alias(self): + # GH 24653: alias .to_numpy() for scalars + ts = Timestamp(datetime.now()) + assert ts.to_datetime64() == ts.to_numpy()