-
-
Notifications
You must be signed in to change notification settings - Fork 17.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DOC/CLN: Fix errors in DataFrame docstrings #24952
Changes from 5 commits
2072a58
967c243
55510df
2bd26a2
8149a48
22c5d5c
9df0170
83170f0
14a5e40
7a9a3e9
c4ec33b
3bfb592
3ecebbd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1074,7 +1074,7 @@ def from_dict(cls, data, orient='columns', dtype=None, columns=None): | |
|
||
Returns | ||
------- | ||
pandas.DataFrame | ||
DataFrame | ||
|
||
See Also | ||
-------- | ||
|
@@ -1154,7 +1154,7 @@ def to_numpy(self, dtype=None, copy=False): | |
|
||
Returns | ||
------- | ||
array : numpy.ndarray | ||
numpy.ndarray | ||
|
||
See Also | ||
-------- | ||
|
@@ -1445,7 +1445,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, | |
|
||
Returns | ||
------- | ||
df : DataFrame | ||
DataFrame | ||
""" | ||
|
||
# Make a copy of the input columns so we can modify it | ||
|
@@ -1760,7 +1760,7 @@ def from_items(cls, items, columns=None, orient='columns'): | |
|
||
Returns | ||
------- | ||
frame : DataFrame | ||
DataFrame | ||
""" | ||
|
||
warnings.warn("from_items is deprecated. Please use " | ||
|
@@ -1871,7 +1871,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, | |
|
||
Returns | ||
------- | ||
y : DataFrame | ||
DataFrame | ||
|
||
See Also | ||
-------- | ||
|
@@ -1961,7 +1961,7 @@ def to_panel(self): | |
|
||
Returns | ||
------- | ||
panel : Panel | ||
Panel | ||
""" | ||
# only support this kind for now | ||
if (not isinstance(self.index, MultiIndex) or # pragma: no cover | ||
|
@@ -2521,7 +2521,7 @@ def memory_usage(self, index=True, deep=False): | |
|
||
Returns | ||
------- | ||
sizes : Series | ||
Series | ||
A Series whose index is the original column names and whose values | ||
is the memory usage of each column in bytes. | ||
|
||
|
@@ -2739,7 +2739,7 @@ def get_value(self, index, col, takeable=False): | |
|
||
Returns | ||
------- | ||
value : scalar value | ||
scalar value | ||
""" | ||
|
||
warnings.warn("get_value is deprecated and will be removed " | ||
|
@@ -2784,7 +2784,7 @@ def set_value(self, index, col, value, takeable=False): | |
|
||
Returns | ||
------- | ||
frame : DataFrame | ||
DataFrame | ||
If label pair is contained, will be reference to calling DataFrame, | ||
otherwise a new object | ||
""" | ||
|
@@ -3021,7 +3021,7 @@ def query(self, expr, inplace=False, **kwargs): | |
|
||
Returns | ||
------- | ||
q : DataFrame | ||
DataFrame | ||
|
||
See Also | ||
-------- | ||
|
@@ -3204,7 +3204,7 @@ def select_dtypes(self, include=None, exclude=None): | |
|
||
Returns | ||
------- | ||
subset : DataFrame | ||
DataFrame | ||
The subset of the frame including the dtypes in ``include`` and | ||
excluding the dtypes in ``exclude``. | ||
|
||
|
@@ -3569,7 +3569,7 @@ def _sanitize_column(self, key, value, broadcast=True): | |
|
||
Returns | ||
------- | ||
sanitized_column : numpy-array | ||
numpy-array | ||
dsaxton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
|
||
def reindexer(value): | ||
|
@@ -3838,7 +3838,7 @@ def drop(self, labels=None, axis=0, index=None, columns=None, | |
|
||
Returns | ||
------- | ||
dropped : pandas.DataFrame | ||
DataFrame | ||
|
||
Raises | ||
------ | ||
|
@@ -3963,7 +3963,7 @@ def rename(self, *args, **kwargs): | |
|
||
Returns | ||
------- | ||
renamed : DataFrame | ||
DataFrame | ||
|
||
See Also | ||
-------- | ||
|
@@ -4630,7 +4630,7 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False): | |
|
||
Returns | ||
------- | ||
deduplicated : DataFrame | ||
DataFrame | ||
""" | ||
if self.empty: | ||
return self.copy() | ||
|
@@ -4664,7 +4664,7 @@ def duplicated(self, subset=None, keep='first'): | |
|
||
Returns | ||
------- | ||
duplicated : Series | ||
Series | ||
""" | ||
from pandas.core.sorting import get_group_index | ||
from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT | ||
|
@@ -5032,7 +5032,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): | |
|
||
Returns | ||
------- | ||
swapped : same type as caller (new object) | ||
same type as caller (new object) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm I think when we've done this for other objects we've said something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I switched this to |
||
|
||
.. versionchanged:: 0.18.1 | ||
|
||
|
@@ -5153,7 +5153,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): | |
|
||
Returns | ||
------- | ||
result : DataFrame | ||
DataFrame | ||
|
||
See Also | ||
-------- | ||
|
@@ -5311,7 +5311,7 @@ def combine_first(self, other): | |
|
||
Returns | ||
------- | ||
combined : DataFrame | ||
DataFrame | ||
|
||
See Also | ||
-------- | ||
|
@@ -5672,7 +5672,7 @@ def pivot(self, index=None, columns=None, values=None): | |
|
||
Returns | ||
------- | ||
table : DataFrame | ||
DataFrame | ||
|
||
See Also | ||
-------- | ||
|
@@ -5959,7 +5959,7 @@ def unstack(self, level=-1, fill_value=None): | |
|
||
Returns | ||
------- | ||
unstacked : DataFrame or Series | ||
DataFrame or Series | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Super nitpicky but would be nice to order these the same way whenever used. A quick grep on codebase tells me @datapythonista something to consider There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like that idea, consistency is good wherever possible in my opinion |
||
|
||
See Also | ||
-------- | ||
|
@@ -6125,7 +6125,7 @@ def diff(self, periods=1, axis=0): | |
|
||
Returns | ||
------- | ||
diffed : DataFrame | ||
DataFrame | ||
|
||
See Also | ||
-------- | ||
|
@@ -6397,7 +6397,7 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, | |
|
||
Returns | ||
------- | ||
applied : Series or DataFrame | ||
Series or DataFrame | ||
|
||
See Also | ||
-------- | ||
|
@@ -6590,7 +6590,7 @@ def append(self, other, ignore_index=False, | |
|
||
Returns | ||
------- | ||
appended : DataFrame | ||
DataFrame | ||
|
||
See Also | ||
-------- | ||
|
@@ -6982,12 +6982,13 @@ def corr(self, method='pearson', min_periods=1): | |
|
||
min_periods : int, optional | ||
Minimum number of observations required per pair of columns | ||
to have a valid result. Currently only available for pearson | ||
and spearman correlation | ||
to have a valid result. Currently only available for Pearson | ||
and Spearman correlation. | ||
|
||
Returns | ||
------- | ||
y : DataFrame | ||
DataFrame | ||
Correlation matrix. | ||
|
||
See Also | ||
-------- | ||
|
@@ -6996,14 +6997,15 @@ def corr(self, method='pearson', min_periods=1): | |
|
||
Examples | ||
-------- | ||
>>> histogram_intersection = lambda a, b: np.minimum(a, b | ||
... ).sum().round(decimals=1) | ||
>>> def histogram_intersection(a, b): | ||
... v = np.minimum(a, b).sum().round(decimals=1) | ||
... return v | ||
>>> df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], | ||
... columns=['dogs', 'cats']) | ||
>>> df.corr(method=histogram_intersection) | ||
dogs cats | ||
dogs 1.0 0.3 | ||
cats 0.3 1.0 | ||
dogs cats | ||
dogs 1.0 0.3 | ||
cats 0.3 1.0 | ||
""" | ||
numeric_df = self._get_numeric_data() | ||
cols = numeric_df.columns | ||
|
@@ -7166,10 +7168,11 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'): | |
Parameters | ||
---------- | ||
other : DataFrame, Series | ||
Object with which to compute correlations. | ||
axis : {0 or 'index', 1 or 'columns'}, default 0 | ||
0 or 'index' to compute column-wise, 1 or 'columns' for row-wise | ||
drop : boolean, default False | ||
Drop missing indices from result | ||
0 or 'index' to compute column-wise, 1 or 'columns' for row-wise. | ||
drop : bool, default False | ||
Drop missing indices from result. | ||
method : {'pearson', 'kendall', 'spearman'} or callable | ||
* pearson : standard correlation coefficient | ||
* kendall : Kendall Tau correlation coefficient | ||
|
@@ -7181,7 +7184,8 @@ def corrwith(self, other, axis=0, drop=False, method='pearson'): | |
|
||
Returns | ||
------- | ||
correls : Series | ||
Series | ||
Pairwise correlations. | ||
|
||
See Also | ||
------- | ||
|
@@ -7262,7 +7266,7 @@ def count(self, axis=0, level=None, numeric_only=False): | |
If the axis is a `MultiIndex` (hierarchical), count along a | ||
particular `level`, collapsing into a `DataFrame`. | ||
A `str` specifies the level name. | ||
numeric_only : boolean, default False | ||
numeric_only : bool, default False | ||
Include only `float`, `int` or `boolean` data. | ||
|
||
Returns | ||
|
@@ -7510,7 +7514,7 @@ def nunique(self, axis=0, dropna=True): | |
|
||
Returns | ||
------- | ||
nunique : Series | ||
Series | ||
|
||
See Also | ||
-------- | ||
|
@@ -7548,7 +7552,8 @@ def idxmin(self, axis=0, skipna=True): | |
|
||
Returns | ||
------- | ||
idxmin : Series | ||
Series | ||
Indexes of minima along the specified axis. | ||
|
||
Raises | ||
------ | ||
|
@@ -7584,7 +7589,8 @@ def idxmax(self, axis=0, skipna=True): | |
|
||
Returns | ||
------- | ||
idxmax : Series | ||
Series | ||
Indexes of maxima along the specified axis. | ||
|
||
Raises | ||
------ | ||
|
@@ -7731,7 +7737,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, | |
|
||
Returns | ||
------- | ||
quantiles : Series or DataFrame | ||
Series or DataFrame | ||
|
||
- If ``q`` is an array, a DataFrame will be returned where the | ||
index is ``q``, the columns are the columns of self, and the | ||
|
@@ -7801,19 +7807,19 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True): | |
|
||
Parameters | ||
---------- | ||
freq : string, default frequency of PeriodIndex | ||
Desired frequency | ||
freq : str, default frequency of PeriodIndex | ||
Desired frequency. | ||
how : {'s', 'e', 'start', 'end'} | ||
Convention for converting period to timestamp; start of period | ||
vs. end | ||
vs. end. | ||
axis : {0 or 'index', 1 or 'columns'}, default 0 | ||
The axis to convert (the index by default) | ||
copy : boolean, default True | ||
If false then underlying input data is not copied | ||
The axis to convert (the index by default). | ||
copy : bool, default True | ||
If False then underlying input data is not copied. | ||
|
||
Returns | ||
------- | ||
df : DataFrame with DatetimeIndex | ||
DataFrame with DatetimeIndex | ||
""" | ||
new_data = self._data | ||
if copy: | ||
|
@@ -7837,15 +7843,16 @@ def to_period(self, freq=None, axis=0, copy=True): | |
|
||
Parameters | ||
---------- | ||
freq : string, default | ||
freq : str, default | ||
Frequency of the PeriodIndex. | ||
axis : {0 or 'index', 1 or 'columns'}, default 0 | ||
The axis to convert (the index by default) | ||
copy : boolean, default True | ||
If False then underlying input data is not copied | ||
The axis to convert (the index by default). | ||
copy : bool, default True | ||
If False then underlying input data is not copied. | ||
|
||
Returns | ||
------- | ||
ts : TimeSeries with PeriodIndex | ||
TimeSeries with PeriodIndex | ||
""" | ||
new_data = self._data | ||
if copy: | ||
|
@@ -7918,7 +7925,7 @@ def isin(self, values): | |
match. Note that 'falcon' does not match based on the number of legs | ||
in df2. | ||
|
||
>>> other = pd.DataFrame({'num_legs': [8, 2],'num_wings': [0, 2]}, | ||
>>> other = pd.DataFrame({'num_legs': [8, 2], 'num_wings': [0, 2]}, | ||
... index=['spider', 'falcon']) | ||
>>> df.isin(other) | ||
num_legs num_wings | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be preferable to state the type of scalar being returned here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would something like
object
be appropriate in cases like these where the type is mostly unspecified?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good question. What you've suggested seems like the most logical thing to me!
cc @datapythonista