Skip to content

Commit

Permalink
DOC: correct docstring examples (pandas-dev#3439) (pandas-dev#16432)
Browse files Browse the repository at this point in the history
  • Loading branch information
ProsperousHeart authored and stangirala committed Jun 11, 2017
1 parent c701c65 commit 105f5b8
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 80 deletions.
9 changes: 9 additions & 0 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ if [ "$DOC" ]; then
git remote -v

git push origin gh-pages -f

echo "Running doctests"
cd "$TRAVIS_BUILD_DIR"
pytest --doctest-modules \
pandas/core/reshape/concat.py \
pandas/core/reshape/pivot.py \
pandas/core/reshape/reshape.py \
pandas/core/reshape/tile.py

fi

exit 0
2 changes: 2 additions & 0 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
0
a 2
>>> pd.concat([df5, df6], verify_integrity=True)
Traceback (most recent call last):
...
ValueError: Indexes have overlapping values: ['a']
"""
op = _Concatenator(objs, axis=axis, join_axes=join_axes,
Expand Down
72 changes: 41 additions & 31 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,26 +50,36 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
Examples
--------
>>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
... "bar", "bar", "bar", "bar"],
... "B": ["one", "one", "one", "two", "two",
... "one", "one", "two", "two"],
... "C": ["small", "large", "large", "small",
... "small", "large", "small", "small",
... "large"],
... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})
>>> df
A B C D
0 foo one small 1
1 foo one large 2
2 foo one large 2
3 foo two small 3
4 foo two small 3
5 bar one large 4
6 bar one small 5
7 bar two small 6
8 bar two large 7
A B C D
0 foo one small 1
1 foo one large 2
2 foo one large 2
3 foo two small 3
4 foo two small 3
5 bar one large 4
6 bar one small 5
7 bar two small 6
8 bar two large 7
>>> table = pivot_table(df, values='D', index=['A', 'B'],
... columns=['C'], aggfunc=np.sum)
>>> table
small large
foo one 1 4
two 6 NaN
bar one 5 4
two 6 7
... # doctest: +NORMALIZE_WHITESPACE
C large small
A B
bar one 4.0 5.0
two 7.0 6.0
foo one 4.0 1.0
two NaN 6.0
Returns
-------
Expand Down Expand Up @@ -445,27 +455,27 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
Examples
--------
>>> a
array([foo, foo, foo, foo, bar, bar,
bar, bar, foo, foo, foo], dtype=object)
>>> b
array([one, one, one, two, one, one,
one, two, two, two, one], dtype=object)
>>> c
array([dull, dull, shiny, dull, dull, shiny,
shiny, dull, shiny, shiny, shiny], dtype=object)
>>> crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
b one two
c dull shiny dull shiny
>>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar",
... "bar", "bar", "foo", "foo", "foo"], dtype=object)
>>> b = np.array(["one", "one", "one", "two", "one", "one",
... "one", "two", "two", "two", "one"], dtype=object)
>>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny",
... "shiny", "dull", "shiny", "shiny", "shiny"],
... dtype=object)
>>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
... # doctest: +NORMALIZE_WHITESPACE
b one two
c dull shiny dull shiny
a
bar 1 2 1 0
foo 2 2 1 2
bar 1 2 1 0
foo 2 2 1 2
>>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
>>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
>>> crosstab(foo, bar) # 'c' and 'f' are not represented in the data,
# but they still will be counted in the output
... # but they still will be counted in the output
... # doctest: +SKIP
col_0 d e f
row_0
a 1 0 0
Expand Down
81 changes: 43 additions & 38 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,23 @@ class _Unstacker(object):
>>> import pandas as pd
>>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
... ('two', 'a'), ('two', 'b')])
>>> s = pd.Series(np.arange(1.0, 5.0), index=index)
>>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index)
>>> s
one a 1
b 2
two a 3
b 4
dtype: float64
one a 1
b 2
two a 3
b 4
dtype: int64
>>> s.unstack(level=-1)
a b
a b
one 1 2
two 3 4
>>> s.unstack(level=0)
one two
a 1 2
b 3 4
a 1 3
b 2 4
Returns
-------
Expand Down Expand Up @@ -789,18 +789,18 @@ def lreshape(data, groups, dropna=True, label=None):
>>> import pandas as pd
>>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
... 'team': ['Red Sox', 'Yankees'],
... 'year1': [2007, 2008], 'year2': [2008, 2008]})
... 'year1': [2007, 2007], 'year2': [2008, 2008]})
>>> data
hr1 hr2 team year1 year2
0 514 545 Red Sox 2007 2008
1 573 526 Yankees 2007 2008
>>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
team hr year
0 Red Sox 514 2007
1 Yankees 573 2007
2 Red Sox 545 2008
3 Yankees 526 2008
team year hr
0 Red Sox 2007 514
1 Yankees 2007 573
2 Red Sox 2008 545
3 Yankees 2008 526
Returns
-------
Expand Down Expand Up @@ -905,11 +905,12 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
... })
>>> df["id"] = df.index
>>> df
A1970 A1980 B1970 B1980 X id
A1970 A1980 B1970 B1980 X id
0 a d 2.5 3.2 -1.085631 0
1 b e 1.2 1.3 0.997345 1
2 c f 0.7 0.1 0.282978 2
>>> pd.wide_to_long(df, ["A", "B"], i="id", j="year")
... # doctest: +NORMALIZE_WHITESPACE
X A B
id year
0 1970 -1.085631 a 2.5
Expand Down Expand Up @@ -940,6 +941,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
8 3 3 2.1 2.9
>>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age')
>>> l
... # doctest: +NORMALIZE_WHITESPACE
ht
famid birth age
1 1 1 2.8
Expand Down Expand Up @@ -979,41 +981,44 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
Less wieldy column names are also handled
>>> np.random.seed(0)
>>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3),
... 'A(quarterly)-2011': np.random.rand(3),
... 'B(quarterly)-2010': np.random.rand(3),
... 'B(quarterly)-2011': np.random.rand(3),
... 'X' : np.random.randint(3, size=3)})
>>> df['id'] = df.index
>>> df
A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 B(quarterly)-2011
0 0.531828 0.724455 0.322959 0.293714
1 0.634401 0.611024 0.361789 0.630976
2 0.849432 0.722443 0.228263 0.092105
\
>>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 ...
0 0.548814 0.544883 0.437587 ...
1 0.715189 0.423655 0.891773 ...
2 0.602763 0.645894 0.963663 ...
X id
0 0 0
1 1 1
2 2 2
>>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'],
i='id', j='year', sep='-')
X A(quarterly) B(quarterly)
2 1 2
>>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id',
... j='year', sep='-')
... # doctest: +NORMALIZE_WHITESPACE
X A(quarterly) B(quarterly)
id year
0 2010 0 0.531828 0.322959
1 2010 2 0.634401 0.361789
2 2010 2 0.849432 0.228263
0 2011 0 0.724455 0.293714
1 2011 2 0.611024 0.630976
2 2011 2 0.722443 0.092105
0 2010 0 0.548814 0.437587
1 2010 1 0.715189 0.891773
2 2010 1 0.602763 0.963663
0 2011 0 0.544883 0.383442
1 2011 1 0.423655 0.791725
2 2011 1 0.645894 0.528895
If we have many columns, we could also use a regex to find our
stubnames and pass that list on to wide_to_long
>>> stubnames = set([match[0] for match in
df.columns.str.findall('[A-B]\(.*\)').values
if match != [] ])
>>> stubnames = sorted(
... set([match[0] for match in df.columns.str.findall(
... r'[A-B]\(.*\)').values if match != [] ])
... )
>>> list(stubnames)
['B(quarterly)', 'A(quarterly)']
['A(quarterly)', 'B(quarterly)']
Notes
-----
Expand Down Expand Up @@ -1133,7 +1138,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
2 0 0 1
>>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'],
'C': [1, 2, 3]})
... 'C': [1, 2, 3]})
>>> pd.get_dummies(df, prefix=['col1', 'col2'])
C col1_a col1_b col2_a col2_b col2_c
Expand All @@ -1149,7 +1154,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
3 1 0 0
4 1 0 0
>>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True))
>>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True)
b c
0 0 0
1 1 0
Expand Down
24 changes: 13 additions & 11 deletions pandas/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,18 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
Examples
--------
>>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, retbins=True)
([(0.191, 3.367], (0.191, 3.367], (0.191, 3.367], (3.367, 6.533],
(6.533, 9.7], (0.191, 3.367]]
Categories (3, object): [(0.191, 3.367] < (3.367, 6.533] < (6.533, 9.7]],
array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ]))
... # doctest: +ELLIPSIS
([(0.19, 3.367], (0.19, 3.367], (0.19, 3.367], (3.367, 6.533], ...
Categories (3, interval[float64]): [(0.19, 3.367] < (3.367, 6.533] ...
>>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3,
labels=["good","medium","bad"])
>>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]),
... 3, labels=["good", "medium", "bad"])
... # doctest: +SKIP
[good, good, good, medium, bad, good]
Categories (3, object): [good < medium < bad]
>>> pd.cut(np.ones(5), 4, labels=False)
array([1, 1, 1, 1, 1], dtype=int64)
array([1, 1, 1, 1, 1])
"""
# NOTE: this binning code is changed a bit from histogram for var(x) == 0

Expand Down Expand Up @@ -182,15 +182,17 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
Examples
--------
>>> pd.qcut(range(5), 4)
[[0, 1], [0, 1], (1, 2], (2, 3], (3, 4]]
Categories (4, object): [[0, 1] < (1, 2] < (2, 3] < (3, 4]]
... # doctest: +ELLIPSIS
[(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]]
Categories (4, interval[float64]): [(-0.001, 1.0] < (1.0, 2.0] ...
>>> pd.qcut(range(5), 3, labels=["good","medium","bad"])
>>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"])
... # doctest: +SKIP
[good, good, medium, bad, bad]
Categories (3, object): [good < medium < bad]
>>> pd.qcut(range(5), 4, labels=False)
array([0, 0, 1, 2, 3], dtype=int64)
array([0, 0, 1, 2, 3])
"""
x_is_series, series_index, name, x = _preprocess_for_cut(x)

Expand Down

0 comments on commit 105f5b8

Please sign in to comment.