DOC: Fix warnings in doc build (#22838)

pandas-dev · Sep 26, 2018 · a03d953 · a03d953
1 parent 9df8065
commit a03d953
Show file tree

Hide file tree

Showing 14 changed files with 88 additions and 64 deletions.
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -2603,3 +2603,12 @@ objects.
    generated/pandas.Series.ix
    generated/pandas.Series.imag
    generated/pandas.Series.real
+
+
+.. Can't convince sphinx to generate toctree for this class attribute.
+.. So we do it manually to avoid a warning
+
+.. toctree::
+   :hidden:
+
+   generated/pandas.api.extensions.ExtensionDtype.na_value
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
@@ -1935,7 +1935,7 @@ NumPy's type-system for a few cases.
 * :ref:`Categorical <categorical>`
 * :ref:`Datetime with Timezone <timeseries.timezone_series>`
 * :ref:`Period <timeseries.periods>`
-* :ref:`Interval <advanced.indexing.intervallindex>`
+* :ref:`Interval <indexing.intervallindex>`
 
 Pandas uses the ``object`` dtype for storing strings.
 

diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
@@ -505,13 +505,11 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
 .. ipython:: python
 
    df = pd.DataFrame({'A' : [1, 1, 2, 2], 'B' : [1, -1, 1, 2]})
-
    gb = df.groupby('A')
 
    def replace(g):
-      mask = g < 0
-      g.loc[mask] = g[~mask].mean()
-      return g
+       mask = g < 0
+       return g.where(mask, g[~mask].mean())
 
    gb.transform(replace)
 

diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
@@ -73,8 +73,8 @@ large data to thin clients.
 `seaborn <https://seaborn.pydata.org>`__
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Seaborn is a Python visualization library based on `matplotlib
-<http://matplotlib.org>`__.  It provides a high-level, dataset-oriented
+Seaborn is a Python visualization library based on
+`matplotlib <http://matplotlib.org>`__. It provides a high-level, dataset-oriented
 interface for creating attractive statistical graphics. The plotting functions
 in seaborn understand pandas objects and leverage pandas grouping operations
 internally to support concise specification of complex visualizations. Seaborn
@@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying
 (Note: HTML tables may or may not be
 compatible with non-HTML Jupyter output formats.)
 
-See :ref:`Options and Settings <options>` and :ref:`<options.available>`
+See :ref:`Options and Settings <options>` and :ref:`options.available <available>`
 for pandas ``display.`` settings.
 
 `quantopian/qgrid <https://github.com/quantopian/qgrid>`__
@@ -169,7 +169,7 @@ or the clipboard into a new pandas DataFrame via a sophisticated import wizard.
 Most pandas classes, methods and data attributes can be autocompleted in
 Spyder's `Editor <https://docs.spyder-ide.org/editor.html>`__ and
 `IPython Console <https://docs.spyder-ide.org/ipythonconsole.html>`__,
-and Spyder's `Help pane<https://docs.spyder-ide.org/help.html>`__ can retrieve
+and Spyder's `Help pane <https://docs.spyder-ide.org/help.html>`__ can retrieve
 and render Numpydoc documentation on pandas objects in rich text with Sphinx
 both automatically and on-demand.
 

diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -66,16 +66,13 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
 CSV & Text files
 ----------------
 
-The two workhorse functions for reading text files (a.k.a. flat files) are
-:func:`read_csv` and :func:`read_table`. They both use the same parsing code to
-intelligently convert tabular data into a ``DataFrame`` object. See the
-:ref:`cookbook<cookbook.csv>` for some advanced strategies.
+The workhorse function for reading text files (a.k.a. flat files) is
+:func:`read_csv`. See the :ref:`cookbook<cookbook.csv>` for some advanced strategies.
 
 Parsing options
 '''''''''''''''
 
-The functions :func:`read_csv` and :func:`read_table` accept the following
-common arguments:
+:func:`read_csv` accepts the following common arguments:
 
 Basic
 +++++
@@ -780,8 +777,8 @@ Date Handling
 Specifying Date Columns
 +++++++++++++++++++++++
 
-To better facilitate working with datetime data, :func:`read_csv` and
-:func:`read_table` use the keyword arguments ``parse_dates`` and ``date_parser``
+To better facilitate working with datetime data, :func:`read_csv`
+uses the keyword arguments ``parse_dates`` and ``date_parser``
 to allow users to specify a variety of columns and date/time formats to turn the
 input text data into ``datetime`` objects.
 
@@ -1434,7 +1431,7 @@ Suppose you have data indexed by two columns:
 
    print(open('data/mindex_ex.csv').read())
 
-The ``index_col`` argument to ``read_csv`` and ``read_table`` can take a list of
+The ``index_col`` argument to ``read_csv`` can take a list of
 column numbers to turn multiple columns into a ``MultiIndex`` for the index of the
 returned object:
 
@@ -1505,8 +1502,8 @@ class of the csv module. For this, you have to specify ``sep=None``.
 
 .. ipython:: python
 
-    print(open('tmp2.sv').read())
-    pd.read_csv('tmp2.sv', sep=None, engine='python')
+   print(open('tmp2.sv').read())
+   pd.read_csv('tmp2.sv', sep=None, engine='python')
 
 .. _io.multiple_files:
 
@@ -1528,16 +1525,16 @@ rather than reading the entire file into memory, such as the following:
 .. ipython:: python
 
    print(open('tmp.sv').read())
-   table = pd.read_table('tmp.sv', sep='|')
+   table = pd.read_csv('tmp.sv', sep='|')
    table
 
 
-By specifying a ``chunksize`` to ``read_csv`` or ``read_table``, the return
+By specifying a ``chunksize`` to ``read_csv``, the return
 value will be an iterable object of type ``TextFileReader``:
 
 .. ipython:: python
 
-   reader = pd.read_table('tmp.sv', sep='|', chunksize=4)
+   reader = pd.read_csv('tmp.sv', sep='|', chunksize=4)
    reader
 
    for chunk in reader:
@@ -1548,7 +1545,7 @@ Specifying ``iterator=True`` will also return the ``TextFileReader`` object:
 
 .. ipython:: python
 
-   reader = pd.read_table('tmp.sv', sep='|', iterator=True)
+   reader = pd.read_csv('tmp.sv', sep='|', iterator=True)
    reader.get_chunk(5)
 
 .. ipython:: python
@@ -3067,7 +3064,7 @@ Clipboard
 
 A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method,
 which takes the contents of the clipboard buffer and passes them to the
-``read_table`` method. For instance, you can copy the following text to the
+``read_csv`` method. For instance, you can copy the following text to the
 clipboard (CTRL-C on many operating systems):
 
 .. code-block:: python

diff --git a/doc/source/text.rst b/doc/source/text.rst
@@ -312,14 +312,15 @@ All one-dimensional list-likes can be combined in a list-like container (includi
 
     s
     u
-    s.str.cat([u.values, ['A', 'B', 'C', 'D'], map(str, u.index)], na_rep='-')
+    s.str.cat([u.values,
+               u.index.astype(str).values], na_rep='-')
 
 All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None:
 
 .. ipython:: python
 
     v
-    s.str.cat([u, v, ['A', 'B', 'C', 'D']], join='outer', na_rep='-')
+    s.str.cat([u, v], join='outer', na_rep='-')
 
 If using ``join='right'`` on a list of ``others`` that contains different indexes,
 the union of these indexes will be used as the basis for the final concatenation:

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
@@ -753,18 +753,28 @@ regularity will result in a ``DatetimeIndex``, although frequency is lost:
 Iterating through groups
 ------------------------
 
-With the :ref:`Resampler` object in hand, iterating through the grouped data is very
+With the ``Resampler`` object in hand, iterating through the grouped data is very
 natural and functions similarly to :py:func:`itertools.groupby`:
 
 .. ipython:: python
 
-   resampled = df.resample('H')
+   small = pd.Series(
+       range(6),
+       index=pd.to_datetime(['2017-01-01T00:00:00',
+                             '2017-01-01T00:30:00',
+                             '2017-01-01T00:31:00',
+                             '2017-01-01T01:00:00',
+                             '2017-01-01T03:00:00',
+                             '2017-01-01T03:05:00'])
+   )
+   resampled = small.resample('H')
 
    for name, group in resampled:
-       print(name)
-       print(group)
+       print("Group: ", name)
+       print("-" * 27)
+       print(group, end="\n\n")
 
-See :ref:`groupby.iterating-label`.
+See :ref:`groupby.iterating-label` or :class:`Resampler.__iter__` for more.
 
 .. _timeseries.components:
 
@@ -910,26 +920,22 @@ It's definitely worth exploring the ``pandas.tseries.offsets`` module and the
 various docstrings for the classes.
 
 These operations (``apply``, ``rollforward`` and ``rollback``) preserve time 
-(hour, minute, etc) information by default. To reset time, use ``normalize=True`` 
-when creating the offset instance. If ``normalize=True``, the result is 
-normalized after the function is applied.
-
+(hour, minute, etc) information by default. To reset time, use ``normalize``
+before or after applying the operation (depending on whether you want the
+time information included in the operation.
 
 .. ipython:: python
 
+   ts = pd.Timestamp('2014-01-01 09:00')
    day = Day()
-   day.apply(pd.Timestamp('2014-01-01 09:00'))
-
-   day = Day(normalize=True)
-   day.apply(pd.Timestamp('2014-01-01 09:00'))
+   day.apply(ts)
+   day.apply(ts).normalize()
 
+   ts = pd.Timestamp('2014-01-01 22:00')
    hour = Hour()
-   hour.apply(pd.Timestamp('2014-01-01 22:00'))
-
-   hour = Hour(normalize=True)
-   hour.apply(pd.Timestamp('2014-01-01 22:00'))
-   hour.apply(pd.Timestamp('2014-01-01 23:00'))
-
+   hour.apply(ts)
+   hour.apply(ts).normalize()
+   hour.apply(pd.Timestamp("2014-01-01 23:30")).normalize()
 
 .. _timeseries.dayvscalendarday:
 
@@ -1488,6 +1494,7 @@ time. The method for this is :meth:`~Series.shift`, which is available on all of
 the pandas objects.
 
 .. ipython:: python
+
    ts = pd.Series(range(len(rng)), index=rng)
    ts = ts[:5]
    ts.shift(1)

diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -373,7 +373,7 @@ New Behavior:
    s = pd.Series([1,2,3], index=np.arange(3.))
    s
    s.index
-   print(s.to_csv(path=None))
+   print(s.to_csv(path_or_buf=None, header=False))
 
 Changes to dtype assignment behaviors
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -186,7 +186,7 @@ Previously, only ``gzip`` compression was supported. By default, compression of
 URLs and paths are now inferred using their file extensions. Additionally,
 support for bz2 compression in the python 2 C-engine improved (:issue:`14874`).
 
-.. ipython:: python
+.. code-block:: python
 
    url = 'https://github.com/{repo}/raw/{branch}/{path}'.format(
        repo = 'pandas-dev/pandas',

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -253,7 +253,6 @@ UTC offset (:issue:`17697`, :issue:`11736`, :issue:`22457`)
 
 .. code-block:: ipython
 
-
     In [2]: pd.to_datetime("2015-11-18 15:30:00+05:30")
     Out[2]: Timestamp('2015-11-18 10:00:00')
 
@@ -291,6 +290,7 @@ Passing ``utc=True`` will mimic the previous behavior but will correctly indicat
 that the dates have been converted to UTC
 
 .. ipython:: python
+
     pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True)
 
 .. _whatsnew_0240.api_breaking.calendarday:
@@ -457,7 +457,7 @@ Previous Behavior:
     Out[3]: Int64Index([0, 1, 2], dtype='int64')
 
 
-.. _whatsnew_0240.api.timedelta64_subtract_nan
+.. _whatsnew_0240.api.timedelta64_subtract_nan:
 
 Addition/Subtraction of ``NaN`` from :class:`DataFrame`
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -468,9 +468,10 @@ all-``NaT``.  This is for compatibility with ``TimedeltaIndex`` and
 ``Series`` behavior (:issue:`22163`)
 
 .. ipython:: python
+   :okexcept:
 
-    df = pd.DataFrame([pd.Timedelta(days=1)])
-    df - np.nan
+   df = pd.DataFrame([pd.Timedelta(days=1)])
+   df - np.nan
 
 Previous Behavior:
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2060,10 +2060,12 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
             like.
 
             .. versionadded:: 0.19.0
-        compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None},
-                       default 'infer'
+
+        compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
+
             A string representing the compression to use in the output file,
-            only used when the first argument is a filename.
+            only used when the first argument is a filename. By default, the
+            compression is inferred from the filename.
 
             .. versionadded:: 0.21.0
             .. versionchanged:: 0.24.0
@@ -9514,7 +9516,9 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
             a string.
 
             .. versionchanged:: 0.24.0
-                Was previously named "path" for Series.
+
+               Was previously named "path" for Series.
+
         sep : str, default ','
             String of length 1. Field delimiter for the output file.
         na_rep : str, default ''
@@ -9528,7 +9532,9 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
             assumed to be aliases for the column names.
 
             .. versionchanged:: 0.24.0
-                Previously defaulted to False for Series.
+
+               Previously defaulted to False for Series.
+
         index : bool, default True
             Write row names (index).
         index_label : str or sequence, or False, default None
@@ -9550,7 +9556,9 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
             compression).
 
             .. versionchanged:: 0.24.0
+
                'infer' option added and set to default.
+
         quoting : optional constant from csv module
             Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
             then floats are converted to strings and thus csv.QUOTE_NONNUMERIC

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2065,10 +2065,10 @@ def autocorr(self, lag=1):
         Examples
         --------
         >>> s = pd.Series([0.25, 0.5, 0.2, -0.05])
-        >>> s.autocorr()
-        0.1035526330902407
-        >>> s.autocorr(lag=2)
-        -0.9999999999999999
+        >>> s.autocorr()  # doctest: +ELLIPSIS
+        0.10355...
+        >>> s.autocorr(lag=2)  # doctest: +ELLIPSIS
+        -0.99999...
 
         If the Pearson correlation is not well defined, then 'NaN' is returned.
 
@@ -2789,6 +2789,7 @@ def nlargest(self, n=5, keep='first'):
         keep : {'first', 'last', 'all'}, default 'first'
             When there are duplicate values that cannot all fit in a
             Series of `n` elements:
+
             - ``first`` : take the first occurrences based on the index order
             - ``last`` : take the last occurrences based on the index order
             - ``all`` : keep all occurrences. This can result in a Series of
@@ -2884,6 +2885,7 @@ def nsmallest(self, n=5, keep='first'):
         keep : {'first', 'last', 'all'}, default 'first'
             When there are duplicate values that cannot all fit in a
             Series of `n` elements:
+
             - ``first`` : take the first occurrences based on the index order
             - ``last`` : take the last occurrences based on the index order
             - ``all`` : keep all occurrences. This can result in a Series of