Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DOC/CLN: Fix errors in DataFrame docstrings #24952

Merged
merged 13 commits into from
Feb 14, 2019
115 changes: 61 additions & 54 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1074,7 +1074,7 @@ def from_dict(cls, data, orient='columns', dtype=None, columns=None):

Returns
-------
pandas.DataFrame
DataFrame

See Also
--------
Expand Down Expand Up @@ -1154,7 +1154,7 @@ def to_numpy(self, dtype=None, copy=False):

Returns
-------
array : numpy.ndarray
numpy.ndarray

See Also
--------
Expand Down Expand Up @@ -1445,7 +1445,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None,

Returns
-------
df : DataFrame
DataFrame
"""

# Make a copy of the input columns so we can modify it
Expand Down Expand Up @@ -1760,7 +1760,7 @@ def from_items(cls, items, columns=None, orient='columns'):

Returns
-------
frame : DataFrame
DataFrame
"""

warnings.warn("from_items is deprecated. Please use "
Expand Down Expand Up @@ -1871,7 +1871,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True,

Returns
-------
y : DataFrame
DataFrame

See Also
--------
Expand Down Expand Up @@ -1961,7 +1961,7 @@ def to_panel(self):

Returns
-------
panel : Panel
Panel
"""
# only support this kind for now
if (not isinstance(self.index, MultiIndex) or # pragma: no cover
Expand Down Expand Up @@ -2521,7 +2521,7 @@ def memory_usage(self, index=True, deep=False):

Returns
-------
sizes : Series
Series
A Series whose index is the original column names and whose values
is the memory usage of each column in bytes.

Expand Down Expand Up @@ -2739,7 +2739,7 @@ def get_value(self, index, col, takeable=False):

Returns
-------
value : scalar value
scalar value
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be preferable to state the type of scalar being returned here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would something like object be appropriate in cases like these where the type is mostly unspecified?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. What you've suggested seems like the most logical thing to me!

cc @datapythonista

"""

warnings.warn("get_value is deprecated and will be removed "
Expand Down Expand Up @@ -2784,7 +2784,7 @@ def set_value(self, index, col, value, takeable=False):

Returns
-------
frame : DataFrame
DataFrame
If label pair is contained, will be reference to calling DataFrame,
otherwise a new object
"""
Expand Down Expand Up @@ -3021,7 +3021,7 @@ def query(self, expr, inplace=False, **kwargs):

Returns
-------
q : DataFrame
DataFrame

See Also
--------
Expand Down Expand Up @@ -3204,7 +3204,7 @@ def select_dtypes(self, include=None, exclude=None):

Returns
-------
subset : DataFrame
DataFrame
The subset of the frame including the dtypes in ``include`` and
excluding the dtypes in ``exclude``.

Expand Down Expand Up @@ -3569,7 +3569,7 @@ def _sanitize_column(self, key, value, broadcast=True):

Returns
-------
sanitized_column : numpy-array
numpy-array
dsaxton marked this conversation as resolved.
Show resolved Hide resolved
"""

def reindexer(value):
Expand Down Expand Up @@ -3838,7 +3838,7 @@ def drop(self, labels=None, axis=0, index=None, columns=None,

Returns
-------
dropped : pandas.DataFrame
DataFrame

Raises
------
Expand Down Expand Up @@ -3963,7 +3963,7 @@ def rename(self, *args, **kwargs):

Returns
-------
renamed : DataFrame
DataFrame

See Also
--------
Expand Down Expand Up @@ -4630,7 +4630,7 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False):

Returns
-------
deduplicated : DataFrame
DataFrame
"""
if self.empty:
return self.copy()
Expand Down Expand Up @@ -4664,7 +4664,7 @@ def duplicated(self, subset=None, keep='first'):

Returns
-------
duplicated : Series
Series
"""
from pandas.core.sorting import get_group_index
from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT
Expand Down Expand Up @@ -5032,7 +5032,7 @@ def swaplevel(self, i=-2, j=-1, axis=0):

Returns
-------
swapped : same type as caller (new object)
same type as caller (new object)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm I think when we've done this for other objects we've said something like DataFrame or Series (or whatever is applicable) and left Same type as caller as a comment

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I switched this to DataFrame, since that should always be the type of the caller here


.. versionchanged:: 0.18.1

Expand Down Expand Up @@ -5153,7 +5153,7 @@ def combine(self, other, func, fill_value=None, overwrite=True):

Returns
-------
result : DataFrame
DataFrame

See Also
--------
Expand Down Expand Up @@ -5311,7 +5311,7 @@ def combine_first(self, other):

Returns
-------
combined : DataFrame
DataFrame

See Also
--------
Expand Down Expand Up @@ -5672,7 +5672,7 @@ def pivot(self, index=None, columns=None, values=None):

Returns
-------
table : DataFrame
DataFrame

See Also
--------
Expand Down Expand Up @@ -5959,7 +5959,7 @@ def unstack(self, level=-1, fill_value=None):

Returns
-------
unstacked : DataFrame or Series
DataFrame or Series
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Super nitpicky but would be nice to order these the same way whenever used. A quick grep on codebase tells me Series or DataFrame appears more often so maybe stick with that ordering

@datapythonista something to consider

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like that idea, consistency is good wherever possible in my opinion


See Also
--------
Expand Down Expand Up @@ -6125,7 +6125,7 @@ def diff(self, periods=1, axis=0):

Returns
-------
diffed : DataFrame
DataFrame

See Also
--------
Expand Down Expand Up @@ -6397,7 +6397,7 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None,

Returns
-------
applied : Series or DataFrame
Series or DataFrame

See Also
--------
Expand Down Expand Up @@ -6590,7 +6590,7 @@ def append(self, other, ignore_index=False,

Returns
-------
appended : DataFrame
DataFrame

See Also
--------
Expand Down Expand Up @@ -6982,12 +6982,13 @@ def corr(self, method='pearson', min_periods=1):

min_periods : int, optional
Minimum number of observations required per pair of columns
to have a valid result. Currently only available for pearson
and spearman correlation
to have a valid result. Currently only available for Pearson
and Spearman correlation.

Returns
-------
y : DataFrame
DataFrame
Correlation matrix.

See Also
--------
Expand All @@ -6996,14 +6997,15 @@ def corr(self, method='pearson', min_periods=1):

Examples
--------
>>> histogram_intersection = lambda a, b: np.minimum(a, b
... ).sum().round(decimals=1)
>>> def histogram_intersection(a, b):
... v = np.minimum(a, b).sum().round(decimals=1)
... return v
>>> df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)],
... columns=['dogs', 'cats'])
>>> df.corr(method=histogram_intersection)
dogs cats
dogs 1.0 0.3
cats 0.3 1.0
dogs cats
dogs 1.0 0.3
cats 0.3 1.0
"""
numeric_df = self._get_numeric_data()
cols = numeric_df.columns
Expand Down Expand Up @@ -7166,10 +7168,11 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'):
Parameters
----------
other : DataFrame, Series
Object with which to compute correlations.
axis : {0 or 'index', 1 or 'columns'}, default 0
0 or 'index' to compute column-wise, 1 or 'columns' for row-wise
drop : boolean, default False
Drop missing indices from result
0 or 'index' to compute column-wise, 1 or 'columns' for row-wise.
drop : bool, default False
Drop missing indices from result.
method : {'pearson', 'kendall', 'spearman'} or callable
* pearson : standard correlation coefficient
* kendall : Kendall Tau correlation coefficient
Expand All @@ -7181,7 +7184,8 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'):

Returns
-------
correls : Series
Series
Pairwise correlations.

See Also
-------
Expand Down Expand Up @@ -7262,7 +7266,7 @@ def count(self, axis=0, level=None, numeric_only=False):
If the axis is a `MultiIndex` (hierarchical), count along a
particular `level`, collapsing into a `DataFrame`.
A `str` specifies the level name.
numeric_only : boolean, default False
numeric_only : bool, default False
Include only `float`, `int` or `boolean` data.

Returns
Expand Down Expand Up @@ -7510,7 +7514,7 @@ def nunique(self, axis=0, dropna=True):

Returns
-------
nunique : Series
Series

See Also
--------
Expand Down Expand Up @@ -7548,7 +7552,8 @@ def idxmin(self, axis=0, skipna=True):

Returns
-------
idxmin : Series
Series
Indexes of minima along the specified axis.

Raises
------
Expand Down Expand Up @@ -7584,7 +7589,8 @@ def idxmax(self, axis=0, skipna=True):

Returns
-------
idxmax : Series
Series
Indexes of maxima along the specified axis.

Raises
------
Expand Down Expand Up @@ -7731,7 +7737,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True,

Returns
-------
quantiles : Series or DataFrame
Series or DataFrame

- If ``q`` is an array, a DataFrame will be returned where the
index is ``q``, the columns are the columns of self, and the
Expand Down Expand Up @@ -7801,19 +7807,19 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True):

Parameters
----------
freq : string, default frequency of PeriodIndex
Desired frequency
freq : str, default frequency of PeriodIndex
Desired frequency.
how : {'s', 'e', 'start', 'end'}
Convention for converting period to timestamp; start of period
vs. end
vs. end.
axis : {0 or 'index', 1 or 'columns'}, default 0
The axis to convert (the index by default)
copy : boolean, default True
If false then underlying input data is not copied
The axis to convert (the index by default).
copy : bool, default True
If False then underlying input data is not copied.

Returns
-------
df : DataFrame with DatetimeIndex
DataFrame with DatetimeIndex
"""
new_data = self._data
if copy:
Expand All @@ -7837,15 +7843,16 @@ def to_period(self, freq=None, axis=0, copy=True):

Parameters
----------
freq : string, default
freq : str, default
Frequency of the PeriodIndex.
axis : {0 or 'index', 1 or 'columns'}, default 0
The axis to convert (the index by default)
copy : boolean, default True
If False then underlying input data is not copied
The axis to convert (the index by default).
copy : bool, default True
If False then underlying input data is not copied.

Returns
-------
ts : TimeSeries with PeriodIndex
TimeSeries with PeriodIndex
"""
new_data = self._data
if copy:
Expand Down Expand Up @@ -7918,7 +7925,7 @@ def isin(self, values):
match. Note that 'falcon' does not match based on the number of legs
in df2.

>>> other = pd.DataFrame({'num_legs': [8, 2],'num_wings': [0, 2]},
>>> other = pd.DataFrame({'num_legs': [8, 2], 'num_wings': [0, 2]},
... index=['spider', 'falcon'])
>>> df.isin(other)
num_legs num_wings
Expand Down
Loading