Docs: Add ModelHub documentation (#6591)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: rusty1s <matthias.fey@tu-dortmund.de>
pyg-team · Feb 8, 2023 · 6e7ba4a · 6e7ba4a
1 parent d8d2d71
commit 6e7ba4a
Show file tree

Hide file tree

Showing 4 changed files with 93 additions and 111 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,7 +10,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added `cat` aggregation type to the `HeteroConv` class so that features can be concatenated during grouping ([#6634](https://github.com/pyg-team/pytorch_geometric/pull/6634))
 - Added `torch.compile` support and benchmark study ([#6610](https://github.com/pyg-team/pytorch_geometric/pull/6610))
 - Added the `AntiSymmetricConv` layer ([#6577](https://github.com/pyg-team/pytorch_geometric/pull/6577))
-- Added a mixin for Huggingface model hub integration ([#5930](https://github.com/pyg-team/pytorch_geometric/pull/5930))
+- Added a mixin for Huggingface model hub integration ([#5930](https://github.com/pyg-team/pytorch_geometric/pull/5930), [#6591](https://github.com/pyg-team/pytorch_geometric/pull/6591))
 - Added support for accelerated GNN layers in `nn.conv.cugraph` via `cugraph-ops` ([#6278](https://github.com/pyg-team/pytorch_geometric/pull/6278), [#6388](https://github.com/pyg-team/pytorch_geometric/pull/6388), [#6412](https://github.com/pyg-team/pytorch_geometric/pull/6412))
 - Added accelerated `index_sort` function from `pyg-lib` for faster sorting ([#6554](https://github.com/pyg-team/pytorch_geometric/pull/6554))
 - Fix incorrect device in `EquilibriumAggregration` ([#6560](https://github.com/pyg-team/pytorch_geometric/pull/6560))

diff --git a/docs/source/modules/nn.rst b/docs/source/modules/nn.rst
@@ -289,11 +289,15 @@ DataParallel Layers
 
 .. automodule:: torch_geometric.nn.data_parallel
    :members:
-   :undoc-members:
-   :exclude-members: training
 
+Model Hub
+---------
+
+.. automodule:: torch_geometric.nn.model_hub
+   :members:
 
 Model Summary
 -------------
 
-.. autofunction:: torch_geometric.nn.summary.summary
+.. automodule:: torch_geometric.nn.summary
+   :members:
diff --git a/torch_geometric/nn/__init__.py b/torch_geometric/nn/__init__.py
@@ -5,6 +5,7 @@
 from .to_hetero_with_bases_transformer import to_hetero_with_bases
 from .to_fixed_size_transformer import to_fixed_size
 from .encoding import PositionalEncoding
+from .model_hub import PyGModelHubMixin
 from .summary import summary
 
 from .aggr import *  # noqa
@@ -26,5 +27,6 @@
     'to_hetero_with_bases',
     'to_fixed_size',
     'PositionalEncoding',
+    'PyGModelHubMixin',
     'summary',
 ]
diff --git a/torch_geometric/nn/model_hub.py b/torch_geometric/nn/model_hub.py
@@ -17,61 +17,55 @@
 
 
 class PyGModelHubMixin(ModelHubMixin):
-    r"""
-    Mixin for saving and loading models to
-    `Huggingface Model Hub <https://huggingface.co/docs/hub/index>`.
-
-    Sample code for saving a :obj:`Node2Vec` model to the model hub:
+    r"""A mixin for saving and loading models to the
+    `Huggingface Model Hub <https://huggingface.co/docs/hub/index>`_.
 
     .. code-block:: python
 
-      from torch_geometric.datasets import Planetoid
-      from torch_geometric.nn import Node2Vec
-      from torch_geometric.nn.model_hub import PyGModelHubMixin
-
-      # Define your class with the mixin:
-      class N2V(Node2Vec, PyGModelHubMixin):
-          def __init__(self,model_name, dataset_name, model_kwargs):
-              Node2Vec.__init__(self,**model_kwargs)
-              PyGModelHubMixin.__init__(self, model_name,
-                  dataset_name, model_kwargs)
-
-      # instantiate your model:
-      n2v = N2V(model_name='node2vec',
-          dataset_name='Cora', model_kwargs=dict(
-          edge_index=data.edge_index, embedding_dim=128,
-          walk_length=20, context_size=10, walks_per_node=10,
-          num_negative_samples=1, p=1, q=1, sparse=True))
-
-      # train model
-      ...
-
-      # push to Huggingface:
-      repo_id = ... # your repo id
-      n2v.save_pretrained(local_file_path, push_to_hub=True,
-          repo_id=repo_id)
-
-      # Load the model for inference:
-      # The required arguments are the repo id/local folder, and any model
-      # initialisation arguments that are not native python types (e.g
-      # Node2Vec requires the edge_index argument which is a tensor--
-      # this is not saved in model hub)
-
-      model = N2V.from_pretrained(repo_id,
-          model_name='node2vec', dataset_name='Cora',
-          edge_index=data.edge_index)
-
+       from torch_geometric.datasets import Planetoid
+       from torch_geometric.nn import Node2Vec, PyGModelHubMixin
+
+       # Define your class with the mixin:
+       class N2V(Node2Vec, PyGModelHubMixin):
+           def __init__(self,model_name, dataset_name, model_kwargs):
+               Node2Vec.__init__(self,**model_kwargs)
+               PyGModelHubMixin.__init__(self, model_name,
+                   dataset_name, model_kwargs)
+
+       # Instantiate your model:
+       n2v = N2V(model_name='node2vec',
+           dataset_name='Cora', model_kwargs=dict(
+           edge_index=data.edge_index, embedding_dim=128,
+           walk_length=20, context_size=10, walks_per_node=10,
+           num_negative_samples=1, p=1, q=1, sparse=True))
+
+       # Train the model:
+       ...
+
+       # Push to the HuggingFace hub:
+       repo_id = ...  # your repo id
+       n2v.save_pretrained(
+           local_file_path,
+           push_to_hub=True,
+           repo_id=repo_id,
+        )
 
-    ..note::
-        At the moment the model card is fairly basic. Override the
-        :obj:`construct_model_card` method if you want a more detailed
-        model card
+       # Load the model for inference:
+       # The required arguments are the repo id/local folder, and any model
+       # initialisation arguments that are not native python types (e.g
+       # Node2Vec requires the edge_index argument which is not stored in the
+       # model hub).
+       model = N2V.from_pretrained(
+           repo_id,
+           model_name='node2vec',
+           dataset_name='Cora',
+           edge_index=data.edge_index,
+       )
 
     Args:
-        model_name (str): Name of the model shown on the model card
-            on hugging face hub.
+        model_name (str): Name of the model.
         dataset_name (str): Name of the dataset the model was trained against.
-        model_kwargs (Dict): Arguments to initialise the Pyg model.
+        model_kwargs (Dict[str, Any]): The arguments to initialise the model.
     """
     def __init__(self, model_name: str, dataset_name: str, model_kwargs: Dict):
         ModelHubMixin.__init__(self)
@@ -100,32 +94,25 @@ def construct_model_card(self, model_name: str, dataset_name: str) -> Any:
         return card
 
     def _save_pretrained(self, save_directory: Union[Path, str]):
-        r"""
-        Args:
-            save_directory (Path or str): local filepath to
-                save model state dict.
-        """
         path = os.path.join(save_directory, MODEL_WEIGHTS_NAME)
         model_to_save = self.module if hasattr(self, "module") else self
         torch.save(model_to_save.state_dict(), path)
 
     def save_pretrained(self, save_directory: Union[str, Path],
                         push_to_hub: bool = False,
                         repo_id: Optional[str] = None, **kwargs):
-        r"""
-        Save a trained model to a local directory or to huggingface model hub.
+        r"""Save a trained model to a local directory or to the HuggingFace
+        model hub.
 
         Args:
-            save_directory (str, Path): The directory where weights are saved,
-                to a file called :obj:`"model.pth"`.
-            push_to_hub(bool): If :obj:`True`, push the model to the
-                model hub. (default: :obj:`False`)
+            save_directory (str): The directory where weights are saved.
+            push_to_hub (bool, optional): If :obj:`True`, push the model to the
+                HuggingFace model hub. (default: :obj:`False`)
             repo_id (str, optional): The repository name in the hub.
                 If not provided will default to the name of
-                :obj:`save_directory` in your namespace.
-                (default: :obj:`None`)
-            **kwargs: Additional keyword arguments passed along to
-                :obj:`huggingface_hub.ModelHubMixin.save_pretrained`.
+                :obj:`save_directory` in your namespace. (default: :obj:`None`)
+            **kwargs: Additional keyword arguments passed to
+                :meth:`huggingface_hub.ModelHubMixin.save_pretrained`.
         """
         config = self.model_config
         # due to way huggingface hub handles the loading/saving of models,
@@ -158,7 +145,6 @@ def _from_pretrained(
         strict=False,
         **model_kwargs,
     ):
-        r"""Load trained model."""
         map_location = torch.device(map_location)
 
         if os.path.isdir(model_id):
@@ -200,57 +186,47 @@ def from_pretrained(
         cache_dir: Optional[str] = None,
         local_files_only: bool = False,
         **model_kwargs,
-    ):
-        r"""
-        Download and instantiate a model from the Hugging Face Hub.
+    ) -> Any:
+        r"""Downloads and instantiates a model from the HuggingFace hub.
 
         Args:
-            pretrained_model_name_or_path (str, Path):
-                Can be either:
-                - A string, the `model id` of a pretrained model
-                hosted inside a model repo on huggingface.co.
-                Valid model ids can be located at the root-level,
-                like `bert-base-uncased`, or namespaced under a
-                user or organization name, like
-                `dbmdz/bert-base-german-cased`.
-                - You can add `revision` by appending `@` at the end
-                of model_id simply like this:
-                `dbmdz/bert-base-german-cased@main` Revision is
-                the specific model version to use. It can be a
-                branch name, a tag name, or a commit id, since we
-                use a git-based system for storing models and
-                other artifacts on huggingface.co, so `revision`
-                can be any identifier allowed by git.
-                - A path to a `directory` containing model weights
-                saved using
-                [`~transformers.PreTrainedModel.save_pretrained`],
-                e.g., `./my_model_directory/`.
-                - `None` if you are both providing the configuration
-                and state dictionary (resp. with keyword arguments
-                :obj:`config` and :obj:`state_dict`).
-            force_download (bool): Whether to force the (re-)download of the
-                model weights and configuration files, overriding the cached
-                versions if they exist. (default: :obj:`False`)
-            resume_download (bool): Whether to delete incompletely received
-                files. Will attempt to resume the download if such a
-                file exists.(default: :obj:`False`)
+            pretrained_model_name_or_path (str): Can be either:
+
+                - The :obj:`model_id` of a pretrained model hosted inside the
+                  HuggingFace hub.
+
+                - You can add a :obj:`revision` by appending :obj:`@` at the
+                  end of :obj:`model_id` to load a specific model version.
+
+                - A path to a directory containing the saved model weights.
+
+                - :obj:`None` if you are both providing the configuration
+                  :obj:`config` and state dictionary :obj:`state_dict`.
+
+            force_download (bool, optional): Whether to force the
+                (re-)download of the model weights and configuration files,
+                overriding the cached versions if they exist.
+                (default: :obj:`False`)
+            resume_download (bool, optional): Whether to delete incompletely
+                received files. Will attempt to resume the download if such a
+                file exists. (default: :obj:`False`)
             proxies (Dict[str, str], optional): A dictionary of proxy servers
-                to use by protocol or endpoint,
-                e.g.,`{'http': 'foo.bar:3128', 'http://host': 'foo.bar:4012'}`.
+                to use by protocol or endpoint, *e.g.*,
+                :obj:`{'http': 'foo.bar:3128', 'http://host': 'foo.bar:4012'}`.
                 The proxies are used on each request. (default: :obj:`None`)
-            token (str, bool, optional): The token to use as HTTP bearer
-                authorization for remote files. If `True`, will use the token
-                generated when running `transformers-cli login` (stored in
-                `~/.huggingface`). It is **required** if you
+            token (str or bool, optional): The token to use as HTTP bearer
+                authorization for remote files. If set to :obj:`True`, will use
+                the token generated when running :obj:`transformers-cli login`
+                (stored in :obj:`~/.huggingface`). It is **required** if you
                 want to use a private model. (default: :obj:`None`)
-            cache_dir (str, Path, optional): Path to a directory in which a
+            cache_dir (str, optional): The path to a directory in which a
                 downloaded model configuration should be cached if the
                 standard cache should not be used. (default: :obj:`None`)
-            local_files_only(bool): Whether to only look at local files
-                (i.e., do not try to download the model).
+            local_files_only (bool, optional): Whether to only look at local
+                files, *i.e.* do not try to download the model.
                 (default: :obj:`False`)
-            **model_kwargs: Keyword arguments passed along to
-                model during initialization. (default: :obj:`None`)
+            **model_kwargs: Additional keyword arguments passed to the
+                model during initialization.
         """
         return super().from_pretrained(
             pretrained_model_name_or_path,