Improves API docs and docstring consistency (#4244)

* refactor py2md * improve py2md, warn if backticks missing * ensure backticks consistent * remove docstring help test * fixes and handle more edge cases * add failing test for pydoc-markdown bug * update pydoc-markdown * fix some links
allenai · May 18, 2020 · 4de68a4 · 4de68a4
1 parent 1b0d231
commit 4de68a4
Show file tree

Hide file tree

Showing 112 changed files with 1,072 additions and 817 deletions.
diff --git a/Makefile b/Makefile
@@ -4,7 +4,7 @@ MD_DOCS_ROOT = docs/
 MD_DOCS_API_ROOT = $(MD_DOCS_ROOT)api/
 MD_DOCS_SRC = $(filter-out $(SRC)/__main__.py %/__init__.py $(SRC)/version.py,$(shell find $(SRC) -type f -name '*.py' | grep -v -E 'tests/'))
 MD_DOCS = $(subst .py,.md,$(subst $(SRC)/,$(MD_DOCS_API_ROOT),$(MD_DOCS_SRC)))
-MD_DOCS_CMD = python scripts/py2md.py
+MD_DOCS_CMD = python allennlp/tools/py2md.py
 MD_DOCS_CONF = mkdocs.yml
 MD_DOCS_CONF_SRC = mkdocs-skeleton.yml
 MD_DOCS_TGT = site/
@@ -118,7 +118,7 @@ $(MD_DOCS_ROOT)%.md : %.md
 $(MD_DOCS_CONF) : $(MD_DOCS_CONF_SRC) $(MD_DOCS)
 	python scripts/build_docs_config.py $@ $(MD_DOCS_CONF_SRC) $(MD_DOCS_ROOT) $(MD_DOCS_API_ROOT)
 
-$(MD_DOCS_API_ROOT)%.md : $(SRC)/%.py scripts/py2md.py
+$(MD_DOCS_API_ROOT)%.md : $(SRC)/%.py allennlp/tools/py2md.py
 	mkdir -p $(shell dirname $@)
 	$(MD_DOCS_CMD) $(subst /,.,$(subst .py,,$<)) --out $@
 

diff --git a/allennlp/commands/evaluate.py b/allennlp/commands/evaluate.py
@@ -2,54 +2,8 @@
 The `evaluate` subcommand can be used to
 evaluate a trained model against a dataset
 and report any metrics calculated by the model.
-
-    $ allennlp evaluate --help
-    usage: allennlp evaluate [-h] [--output-file OUTPUT_FILE]
-                             [--weights-file WEIGHTS_FILE]
-                             [--cuda-device CUDA_DEVICE] [-o OVERRIDES]
-                             [--batch-size BATCH_SIZE]
-                             [--batch-weight-key BATCH_WEIGHT_KEY]
-                             [--extend-vocab]
-                             [--embedding-sources-mapping EMBEDDING_SOURCES_MAPPING]
-                             [--include-package INCLUDE_PACKAGE]
-                             archive_file input_file
-
-    Evaluate the specified model + dataset
-
-    positional arguments:
-      archive_file          path to an archived trained model
-      input_file            path to the file containing the evaluation data
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      --output-file OUTPUT_FILE
-                            path to output file
-      --weights-file WEIGHTS_FILE
-                            a path that overrides which weights file to use
-      --cuda-device CUDA_DEVICE
-                            id of GPU to use (if any)
-      -o OVERRIDES, --overrides OVERRIDES
-                            a JSON structure used to override the experiment
-                            configuration
-      --batch-size BATCH_SIZE
-                            If non-empty, the batch size to use during evaluation.
-      --batch-weight-key BATCH_WEIGHT_KEY
-                            If non-empty, name of metric used to weight the loss
-                            on a per-batch basis.
-      --extend-vocab        if specified, we will use the instances in your new
-                            dataset to extend your vocabulary. If pretrained-file
-                            was used to initialize embedding layers, you may also
-                            need to pass --embedding-sources-mapping.
-      --embedding-sources-mapping EMBEDDING_SOURCES_MAPPING
-                            a JSON dict defining mapping from embedding module
-                            path to embedding pretrained-file used during
-                            training. If not passed, and embedding needs to be
-                            extended, we will try to use the original file paths
-                            used during training. If they are not available we
-                            will use random vectors for embedding extension.
-      --include-package INCLUDE_PACKAGE
-                            additional packages to include
 """
+
 import argparse
 import json
 import logging

diff --git a/allennlp/commands/find_learning_rate.py b/allennlp/commands/find_learning_rate.py
@@ -2,43 +2,6 @@
 The `find-lr` subcommand can be used to find a good learning rate for a model.
 It requires a configuration file and a directory in
 which to write the results.
-
-    $ allennlp find-lr --help
-    usage: allennlp find-lr [-h] -s SERIALIZATION_DIR [-o OVERRIDES]
-                            [--start-lr START_LR] [--end-lr END_LR]
-                            [--num-batches NUM_BATCHES]
-                            [--stopping-factor STOPPING_FACTOR] [--linear] [-f]
-                            [--include-package INCLUDE_PACKAGE]
-                            param_path
-
-    Find a learning rate range where loss decreases quickly for the specified
-    model and dataset.
-
-    positional arguments:
-      param_path            path to parameter file describing the model to be
-                            trained
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      -s SERIALIZATION_DIR, --serialization-dir SERIALIZATION_DIR
-                            The directory in which to save results.
-      -o OVERRIDES, --overrides OVERRIDES
-                            a JSON structure used to override the experiment
-                            configuration
-      --start-lr START_LR   learning rate to start the search (default = 1e-05)
-      --end-lr END_LR       learning rate up to which search is done (default =
-                            10)
-      --num-batches NUM_BATCHES
-                            number of mini-batches to run learning rate finder
-                            (default = 100)
-      --stopping-factor STOPPING_FACTOR
-                            stop the search when the current loss exceeds the best
-                            loss recorded by multiple of stopping factor
-      --linear              increase learning rate linearly instead of exponential
-                            increase
-      -f, --force           overwrite the output directory if it exists
-      --include-package INCLUDE_PACKAGE
-                            additional packages to include
 """
 
 import argparse
@@ -161,7 +124,7 @@ def find_learning_rate_model(
 
     # Parameters
 
-    params : [`Params`](../common/params.md#params)
+    params : `Params`
         A parameter object specifying an AllenNLP Experiment.
     serialization_dir : `str`
         The directory in which to save results.
@@ -266,7 +229,7 @@ def search_learning_rate(
 
     # Parameters
 
-    trainer: [`GradientDescentTrainer`](../training/trainer.md#gradientdescenttrainer)
+    trainer: `GradientDescentTrainer`
     start_lr : `float`
         The learning rate to start the search.
     end_lr : `float`

diff --git a/allennlp/commands/predict.py b/allennlp/commands/predict.py
@@ -2,51 +2,8 @@
 The `predict` subcommand allows you to make bulk JSON-to-JSON
 or dataset to JSON predictions using a trained model and its
 [`Predictor`](../predictors/predictor.md#predictor) wrapper.
-
-    $ allennlp predict --help
-    usage: allennlp predict [-h] [--output-file OUTPUT_FILE]
-                            [--weights-file WEIGHTS_FILE]
-                            [--batch-size BATCH_SIZE] [--silent]
-                            [--cuda-device CUDA_DEVICE] [--use-dataset-reader]
-                            [--dataset-reader-choice {train,validation}]
-                            [-o OVERRIDES] [--predictor PREDICTOR]
-                            [--include-package INCLUDE_PACKAGE]
-                            archive_file input_file
-
-    Run the specified model against a JSON-lines input file.
-
-    positional arguments:
-      archive_file          the archived model to make predictions with
-      input_file            path to or url of the input file
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      --output-file OUTPUT_FILE
-                            path to output file
-      --weights-file WEIGHTS_FILE
-                            a path that overrides which weights file to use
-      --batch-size BATCH_SIZE
-                            The batch size to use for processing
-      --silent              do not print output to stdout
-      --cuda-device CUDA_DEVICE
-                            id of GPU to use (if any)
-      --use-dataset-reader  Whether to use the dataset reader of the original
-                            model to load Instances. The validation dataset reader
-                            will be used if it exists, otherwise it will fall back
-                            to the train dataset reader. This behavior can be
-                            overridden with the --dataset-reader-choice flag.
-      --dataset-reader-choice {train,validation}
-                            Indicates which model dataset reader to use if the
-                            --use-dataset-reader flag is set. (default =
-                            validation)
-      -o OVERRIDES, --overrides OVERRIDES
-                            a JSON structure used to override the experiment
-                            configuration
-      --predictor PREDICTOR
-                            optionally specify a specific predictor to use
-      --include-package INCLUDE_PACKAGE
-                            additional packages to include
 """
+
 from typing import List, Iterator, Optional
 import argparse
 import sys

diff --git a/allennlp/commands/print_results.py b/allennlp/commands/print_results.py
@@ -1,29 +1,8 @@
 """
 The `print-results` subcommand allows you to print results from multiple
 allennlp serialization directories to the console in a helpful csv format.
-
-    $ allennlp print-results --help
-    usage: allennlp print-results [-h] [-k KEYS [KEYS ...]] [-m METRICS_FILENAME]
-                                  [--include-package INCLUDE_PACKAGE]
-                                  path
-
-    Print results from allennlp training runs in a helpful CSV format.
-
-    positional arguments:
-      path                  Path to recursively search for allennlp serialization
-                            directories.
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      -k KEYS [KEYS ...], --keys KEYS [KEYS ...]
-                            Keys to print from metrics.json.Keys not present in
-                            all metrics.json will result in "N/A"
-      -m METRICS_FILENAME, --metrics-filename METRICS_FILENAME
-                            Name of the metrics file to inspect. (default =
-                            metrics.json)
-      --include-package INCLUDE_PACKAGE
-                            additional packages to include
 """
+
 import argparse
 import json
 import logging

diff --git a/allennlp/commands/subcommand.py b/allennlp/commands/subcommand.py
@@ -1,6 +1,7 @@
 """
 Base class for subcommands under `allennlp.run`.
 """
+
 import argparse
 from typing import Callable, Dict, Optional, Type, TypeVar
 

diff --git a/allennlp/commands/test_install.py b/allennlp/commands/test_install.py
@@ -1,20 +1,6 @@
 """
 The `test-install` subcommand verifies
 an installation by running the unit tests.
-
-    $ allennlp test-install --help
-    usage: allennlp test-install [-h] [--run-all] [-k K]
-                                 [--include-package INCLUDE_PACKAGE]
-
-    Test that installation works by running the unit tests.
-
-    optional arguments:
-      -h, --help            show this help message and exit
-      --run-all             By default, we skip tests that are slow or download
-                            large files. This flag will run all tests.
-      -k K                  Limit tests by setting pytest -k argument
-      --include-package INCLUDE_PACKAGE
-                            additional packages to include
 """
 
 import argparse