From 2072a58965b3689c2d7e4fc2c953c34883727cff Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 26 Jan 2019 09:47:15 -0600 Subject: [PATCH 01/11] DOC/CLN: Fix errors in DataFrame docstrings --- pandas/core/frame.py | 51 +++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4f79bda25517..512e0c6394b5c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6982,12 +6982,13 @@ def corr(self, method='pearson', min_periods=1): min_periods : int, optional Minimum number of observations required per pair of columns - to have a valid result. Currently only available for pearson - and spearman correlation + to have a valid result. Currently only available for Pearson + and Spearman correlation. Returns ------- y : DataFrame + Correlation matrix as a DataFrame. See Also -------- @@ -6996,14 +6997,15 @@ def corr(self, method='pearson', min_periods=1): Examples -------- - >>> histogram_intersection = lambda a, b: np.minimum(a, b - ... ).sum().round(decimals=1) + >>> def histogram_intersection(a, b): + ... v = np.minimum(a, b).sum().round(decimals=1) + ... return v >>> df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], ... columns=['dogs', 'cats']) >>> df.corr(method=histogram_intersection) - dogs cats - dogs 1.0 0.3 - cats 0.3 1.0 + dogs cats + dogs 1.0 0.3 + cats 0.3 1.0 """ numeric_df = self._get_numeric_data() cols = numeric_df.columns @@ -7166,10 +7168,11 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'): Parameters ---------- other : DataFrame, Series + Object with which to compute correlations. axis : {0 or 'index', 1 or 'columns'}, default 0 - 0 or 'index' to compute column-wise, 1 or 'columns' for row-wise - drop : boolean, default False - Drop missing indices from result + 0 or 'index' to compute column-wise, 1 or 'columns' for row-wise. + drop : bool, default False + Drop missing indices from result. method : {'pearson', 'kendall', 'spearman'} or callable * pearson : standard correlation coefficient * kendall : Kendall Tau correlation coefficient @@ -7182,6 +7185,7 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'): Returns ------- correls : Series + Series of correlations. See Also ------- @@ -7262,7 +7266,7 @@ def count(self, axis=0, level=None, numeric_only=False): If the axis is a `MultiIndex` (hierarchical), count along a particular `level`, collapsing into a `DataFrame`. A `str` specifies the level name. - numeric_only : boolean, default False + numeric_only : bool, default False Include only `float`, `int` or `boolean` data. Returns @@ -7549,6 +7553,7 @@ def idxmin(self, axis=0, skipna=True): Returns ------- idxmin : Series + Series of indexes of minima along the specified axis. Raises ------ @@ -7585,6 +7590,7 @@ def idxmax(self, axis=0, skipna=True): Returns ------- idxmax : Series + Series of indexes of maxima along the specified axis. Raises ------ @@ -7801,15 +7807,15 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True): Parameters ---------- - freq : string, default frequency of PeriodIndex - Desired frequency + freq : str, default frequency of PeriodIndex + Desired frequency. how : {'s', 'e', 'start', 'end'} Convention for converting period to timestamp; start of period - vs. end + vs. end. axis : {0 or 'index', 1 or 'columns'}, default 0 - The axis to convert (the index by default) - copy : boolean, default True - If false then underlying input data is not copied + The axis to convert (the index by default). + copy : bool, default True + If False then underlying input data is not copied. Returns ------- @@ -7837,11 +7843,12 @@ def to_period(self, freq=None, axis=0, copy=True): Parameters ---------- - freq : string, default + freq : str, default + Frequency of the PeriodIndex. axis : {0 or 'index', 1 or 'columns'}, default 0 - The axis to convert (the index by default) - copy : boolean, default True - If False then underlying input data is not copied + The axis to convert (the index by default). + copy : bool, default True + If False then underlying input data is not copied. Returns ------- @@ -7918,7 +7925,7 @@ def isin(self, values): match. Note that 'falcon' does not match based on the number of legs in df2. - >>> other = pd.DataFrame({'num_legs': [8, 2],'num_wings': [0, 2]}, + >>> other = pd.DataFrame({'num_legs': [8, 2], 'num_wings': [0, 2]}, ... index=['spider', 'falcon']) >>> df.isin(other) num_legs num_wings From 967c243b7b15784f387a24e064731b1b7e3a509a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 26 Jan 2019 16:05:03 -0600 Subject: [PATCH 02/11] Change string to str --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2b97661fe9ec3..e2367ab76b2d0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2938,7 +2938,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, will treat them as non-numeric. quotechar : str, default '\"' String of length 1. Character used to quote fields. - line_terminator : string, optional + line_terminator : str, optional The newline character or character sequence to use in the output file. Defaults to `os.linesep`, which depends on the OS in which this method is called ('\n' for linux, '\r\n' for Windows, i.e.). From 55510dfcaad66e9f6b7d5f5026678817b3bdd5e8 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 26 Jan 2019 16:14:00 -0600 Subject: [PATCH 03/11] Add more fixes --- pandas/core/generic.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e2367ab76b2d0..4b48444abac5d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -530,7 +530,7 @@ def set_axis(self, labels, axis=0, inplace=None): The axis to update. The value 0 identifies the rows, and 1 identifies the columns. - inplace : boolean, default None + inplace : bool, default None Whether to return a new %(klass)s instance. .. warning:: @@ -763,7 +763,7 @@ def pop(self, item): Parameters ---------- item : str - Column label to be popped + Column label to be popped. Returns ------- @@ -926,7 +926,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): Parameters ---------- - i, j : int, string (can be mixed) + i, j : int, str (can be mixed) Level of index to be swapped. Can pass level name as string. Returns @@ -962,9 +962,9 @@ def rename(self, *args, **kwargs): and raise on DataFrame or Panel. dict-like or functions are transformations to apply to that axis' values - copy : boolean, default True - Also copy underlying data - inplace : boolean, default False + copy : bool, default True + Also copy underlying data. + inplace : bool, default False Whether to return a new %(klass)s. If True then value of copy is ignored. level : int or level name, default None @@ -977,7 +977,7 @@ def rename(self, *args, **kwargs): See Also -------- - pandas.NDFrame.rename_axis + NDFrame.rename_axis Examples -------- From 2bd26a238b1c1aa1d9d5dfab32cc978d82a5b2fe Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 26 Jan 2019 18:46:02 -0600 Subject: [PATCH 04/11] Update DataFrame docstrings --- pandas/core/frame.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 512e0c6394b5c..18261fb40ec1e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6987,8 +6987,8 @@ def corr(self, method='pearson', min_periods=1): Returns ------- - y : DataFrame - Correlation matrix as a DataFrame. + DataFrame + Correlation matrix. See Also -------- @@ -7184,8 +7184,8 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'): Returns ------- - correls : Series - Series of correlations. + Series + Pairwise correlations. See Also ------- @@ -7552,8 +7552,8 @@ def idxmin(self, axis=0, skipna=True): Returns ------- - idxmin : Series - Series of indexes of minima along the specified axis. + Series + Indexes of minima along the specified axis. Raises ------ @@ -7589,8 +7589,8 @@ def idxmax(self, axis=0, skipna=True): Returns ------- - idxmax : Series - Series of indexes of maxima along the specified axis. + Series + Indexes of maxima along the specified axis. Raises ------ @@ -7819,7 +7819,7 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True): Returns ------- - df : DataFrame with DatetimeIndex + DataFrame with DatetimeIndex """ new_data = self._data if copy: From 8149a483ec700fb6e37165b8940ed175b4a668c5 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 26 Jan 2019 19:00:16 -0600 Subject: [PATCH 05/11] Fix returns sections --- pandas/core/frame.py | 54 ++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 18261fb40ec1e..d5071208ed5cd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1074,7 +1074,7 @@ def from_dict(cls, data, orient='columns', dtype=None, columns=None): Returns ------- - pandas.DataFrame + DataFrame See Also -------- @@ -1154,7 +1154,7 @@ def to_numpy(self, dtype=None, copy=False): Returns ------- - array : numpy.ndarray + numpy.ndarray See Also -------- @@ -1445,7 +1445,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, Returns ------- - df : DataFrame + DataFrame """ # Make a copy of the input columns so we can modify it @@ -1760,7 +1760,7 @@ def from_items(cls, items, columns=None, orient='columns'): Returns ------- - frame : DataFrame + DataFrame """ warnings.warn("from_items is deprecated. Please use " @@ -1871,7 +1871,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, Returns ------- - y : DataFrame + DataFrame See Also -------- @@ -1961,7 +1961,7 @@ def to_panel(self): Returns ------- - panel : Panel + Panel """ # only support this kind for now if (not isinstance(self.index, MultiIndex) or # pragma: no cover @@ -2521,7 +2521,7 @@ def memory_usage(self, index=True, deep=False): Returns ------- - sizes : Series + Series A Series whose index is the original column names and whose values is the memory usage of each column in bytes. @@ -2739,7 +2739,7 @@ def get_value(self, index, col, takeable=False): Returns ------- - value : scalar value + scalar value """ warnings.warn("get_value is deprecated and will be removed " @@ -2784,7 +2784,7 @@ def set_value(self, index, col, value, takeable=False): Returns ------- - frame : DataFrame + DataFrame If label pair is contained, will be reference to calling DataFrame, otherwise a new object """ @@ -3021,7 +3021,7 @@ def query(self, expr, inplace=False, **kwargs): Returns ------- - q : DataFrame + DataFrame See Also -------- @@ -3204,7 +3204,7 @@ def select_dtypes(self, include=None, exclude=None): Returns ------- - subset : DataFrame + DataFrame The subset of the frame including the dtypes in ``include`` and excluding the dtypes in ``exclude``. @@ -3569,7 +3569,7 @@ def _sanitize_column(self, key, value, broadcast=True): Returns ------- - sanitized_column : numpy-array + numpy-array """ def reindexer(value): @@ -3838,7 +3838,7 @@ def drop(self, labels=None, axis=0, index=None, columns=None, Returns ------- - dropped : pandas.DataFrame + DataFrame Raises ------ @@ -3963,7 +3963,7 @@ def rename(self, *args, **kwargs): Returns ------- - renamed : DataFrame + DataFrame See Also -------- @@ -4630,7 +4630,7 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False): Returns ------- - deduplicated : DataFrame + DataFrame """ if self.empty: return self.copy() @@ -4664,7 +4664,7 @@ def duplicated(self, subset=None, keep='first'): Returns ------- - duplicated : Series + Series """ from pandas.core.sorting import get_group_index from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT @@ -5032,7 +5032,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): Returns ------- - swapped : same type as caller (new object) + same type as caller (new object) .. versionchanged:: 0.18.1 @@ -5153,7 +5153,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): Returns ------- - result : DataFrame + DataFrame See Also -------- @@ -5311,7 +5311,7 @@ def combine_first(self, other): Returns ------- - combined : DataFrame + DataFrame See Also -------- @@ -5672,7 +5672,7 @@ def pivot(self, index=None, columns=None, values=None): Returns ------- - table : DataFrame + DataFrame See Also -------- @@ -5959,7 +5959,7 @@ def unstack(self, level=-1, fill_value=None): Returns ------- - unstacked : DataFrame or Series + DataFrame or Series See Also -------- @@ -6125,7 +6125,7 @@ def diff(self, periods=1, axis=0): Returns ------- - diffed : DataFrame + DataFrame See Also -------- @@ -6397,7 +6397,7 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, Returns ------- - applied : Series or DataFrame + Series or DataFrame See Also -------- @@ -6590,7 +6590,7 @@ def append(self, other, ignore_index=False, Returns ------- - appended : DataFrame + DataFrame See Also -------- @@ -7514,7 +7514,7 @@ def nunique(self, axis=0, dropna=True): Returns ------- - nunique : Series + Series See Also -------- @@ -7737,7 +7737,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, Returns ------- - quantiles : Series or DataFrame + Series or DataFrame - If ``q`` is an array, a DataFrame will be returned where the index is ``q``, the columns are the columns of self, and the @@ -7852,7 +7852,7 @@ def to_period(self, freq=None, axis=0, copy=True): Returns ------- - ts : TimeSeries with PeriodIndex + TimeSeries with PeriodIndex """ new_data = self._data if copy: From 22c5d5ca1488f63ec22f6dce2e3d50bf38060697 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sat, 26 Jan 2019 21:23:57 -0600 Subject: [PATCH 06/11] Update pandas/core/frame.py Co-Authored-By: dsaxton --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d5071208ed5cd..52e9093b519a1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3569,7 +3569,7 @@ def _sanitize_column(self, key, value, broadcast=True): Returns ------- - numpy-array + numpy.ndarray """ def reindexer(value): From 9df0170c0ea91d2fd86ddefeff3f8149ffbb84c2 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 26 Jan 2019 21:29:51 -0600 Subject: [PATCH 07/11] Order types --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d5071208ed5cd..497b5f6bdc27b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5959,7 +5959,7 @@ def unstack(self, level=-1, fill_value=None): Returns ------- - DataFrame or Series + Series or DataFrame See Also -------- From 14a5e40976c3487d37534ac42eb1e00753727bca Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 26 Jan 2019 21:32:35 -0600 Subject: [PATCH 08/11] Change swaplevel return type --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 593dd9d51701f..b20fa5485c5cc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5032,7 +5032,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): Returns ------- - same type as caller (new object) + DataFrame .. versionchanged:: 0.18.1 From 7a9a3e90ebc0279946254dbd059bc2ca45edd34b Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 26 Jan 2019 21:44:41 -0600 Subject: [PATCH 09/11] Edit pop docstring --- pandas/core/generic.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4b48444abac5d..87d341cd293e3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -763,18 +763,18 @@ def pop(self, item): Parameters ---------- item : str - Column label to be popped. + Label of column to be popped. Returns ------- - popped : Series + Series Examples -------- - >>> df = pd.DataFrame([('falcon', 'bird', 389.0), - ... ('parrot', 'bird', 24.0), - ... ('lion', 'mammal', 80.5), - ... ('monkey', 'mammal', np.nan)], + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ... ('parrot', 'bird', 24.0), + ... ('lion', 'mammal', 80.5), + ... ('monkey','mammal', np.nan)], ... columns=('name', 'class', 'max_speed')) >>> df name class max_speed From c4ec33ba03bc85637856d1de76d3a441f79d7c3b Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 26 Jan 2019 21:49:09 -0600 Subject: [PATCH 10/11] Set boolean to bool --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 87d341cd293e3..7796f612edaec 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10270,7 +10270,7 @@ def _doc_parms(cls): Parameters ---------- axis : %(axis_descr)s -skipna : boolean, default True +skipna : bool, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA level : int or level name, default None @@ -10279,7 +10279,7 @@ def _doc_parms(cls): ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. -numeric_only : boolean, default None +numeric_only : bool, default None Include only float, int, boolean columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series. From 3bfb5925d46223bf50dbc28fadecfae5186aac42 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 28 Jan 2019 21:45:25 -0600 Subject: [PATCH 11/11] Remove pandas in DataFrame.cov See Also --- pandas/core/frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b20fa5485c5cc..ed15705a44d1b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7080,10 +7080,10 @@ def cov(self, min_periods=None): See Also -------- - pandas.Series.cov : Compute covariance with another Series. - pandas.core.window.EWM.cov: Exponential weighted sample covariance. - pandas.core.window.Expanding.cov : Expanding sample covariance. - pandas.core.window.Rolling.cov : Rolling sample covariance. + Series.cov : Compute covariance with another Series. + core.window.EWM.cov: Exponential weighted sample covariance. + core.window.Expanding.cov : Expanding sample covariance. + core.window.Rolling.cov : Rolling sample covariance. Notes -----