From dedb82096510039772e599f6e12b3dda645dac53 Mon Sep 17 00:00:00 2001
From: rusty1s <matthias.fey@tu-dortmund.de>
Date: Sat, 14 Jan 2023 10:01:40 +0100
Subject: [PATCH 1/4] update

---
 docs/source/_static/css/custom.css            |   4 +-
 docs/source/{notes => advanced}/batching.rst  |   0
 docs/source/{notes => advanced}/graphgym.rst  |   0
 docs/source/{notes => advanced}/jit.rst       |   0
 docs/source/{notes => advanced}/remote.rst    |   0
 .../{notes => advanced}/sparse_tensor.rst     |   0
 .../{notes => cheatsheet}/cheatsheet.rst      |   0
 .../{notes => cheatsheet}/data_cheatsheet.rst |   0
 docs/source/{notes => external}/resources.rst |   0
 docs/source/{notes => get_started}/colabs.rst |   0
 docs/source/get_started/introduction.rst      | 437 +++++++++++++++++
 docs/source/index.rst                         |  63 ++-
 docs/source/install/installation.rst          | 185 ++++++++
 .../{notes => install}/quick-start.html       |   0
 docs/source/notes/installation.rst            | 186 +-------
 docs/source/notes/introduction.rst            | 438 +-----------------
 .../{notes => tutorial}/create_dataset.rst    |   0
 .../source/{notes => tutorial}/create_gnn.rst |   0
 docs/source/{notes => tutorial}/explain.rst   |   0
 .../{notes => tutorial}/heterogeneous.rst     |   0
 docs/source/{notes => tutorial}/load_csv.rst  |   0
 21 files changed, 669 insertions(+), 644 deletions(-)
 rename docs/source/{notes => advanced}/batching.rst (100%)
 rename docs/source/{notes => advanced}/graphgym.rst (100%)
 rename docs/source/{notes => advanced}/jit.rst (100%)
 rename docs/source/{notes => advanced}/remote.rst (100%)
 rename docs/source/{notes => advanced}/sparse_tensor.rst (100%)
 rename docs/source/{notes => cheatsheet}/cheatsheet.rst (100%)
 rename docs/source/{notes => cheatsheet}/data_cheatsheet.rst (100%)
 rename docs/source/{notes => external}/resources.rst (100%)
 rename docs/source/{notes => get_started}/colabs.rst (100%)
 create mode 100644 docs/source/get_started/introduction.rst
 create mode 100644 docs/source/install/installation.rst
 rename docs/source/{notes => install}/quick-start.html (100%)
 rename docs/source/{notes => tutorial}/create_dataset.rst (100%)
 rename docs/source/{notes => tutorial}/create_gnn.rst (100%)
 rename docs/source/{notes => tutorial}/explain.rst (100%)
 rename docs/source/{notes => tutorial}/heterogeneous.rst (100%)
 rename docs/source/{notes => tutorial}/load_csv.rst (100%)

diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
index 1ebb9929df4e..f2e5c555360b 100644
--- a/docs/source/_static/css/custom.css
+++ b/docs/source/_static/css/custom.css
@@ -1,6 +1,6 @@
 /* Monospace font in "Package Reference" navigation */
-.wy-menu-vertical > ul:last-child li.toctree-l1 > a,
-#pyg-documentation div.toctree-wrapper:last-child ul {
+.wy-menu-vertical > ul:nth-child(10) li.toctree-l1 > a,
+#pyg-documentation div.toctree-wrapper:nth-child(9) ul {
   font-family: SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;
 }
 
diff --git a/docs/source/notes/batching.rst b/docs/source/advanced/batching.rst
similarity index 100%
rename from docs/source/notes/batching.rst
rename to docs/source/advanced/batching.rst
diff --git a/docs/source/notes/graphgym.rst b/docs/source/advanced/graphgym.rst
similarity index 100%
rename from docs/source/notes/graphgym.rst
rename to docs/source/advanced/graphgym.rst
diff --git a/docs/source/notes/jit.rst b/docs/source/advanced/jit.rst
similarity index 100%
rename from docs/source/notes/jit.rst
rename to docs/source/advanced/jit.rst
diff --git a/docs/source/notes/remote.rst b/docs/source/advanced/remote.rst
similarity index 100%
rename from docs/source/notes/remote.rst
rename to docs/source/advanced/remote.rst
diff --git a/docs/source/notes/sparse_tensor.rst b/docs/source/advanced/sparse_tensor.rst
similarity index 100%
rename from docs/source/notes/sparse_tensor.rst
rename to docs/source/advanced/sparse_tensor.rst
diff --git a/docs/source/notes/cheatsheet.rst b/docs/source/cheatsheet/cheatsheet.rst
similarity index 100%
rename from docs/source/notes/cheatsheet.rst
rename to docs/source/cheatsheet/cheatsheet.rst
diff --git a/docs/source/notes/data_cheatsheet.rst b/docs/source/cheatsheet/data_cheatsheet.rst
similarity index 100%
rename from docs/source/notes/data_cheatsheet.rst
rename to docs/source/cheatsheet/data_cheatsheet.rst
diff --git a/docs/source/notes/resources.rst b/docs/source/external/resources.rst
similarity index 100%
rename from docs/source/notes/resources.rst
rename to docs/source/external/resources.rst
diff --git a/docs/source/notes/colabs.rst b/docs/source/get_started/colabs.rst
similarity index 100%
rename from docs/source/notes/colabs.rst
rename to docs/source/get_started/colabs.rst
diff --git a/docs/source/get_started/introduction.rst b/docs/source/get_started/introduction.rst
new file mode 100644
index 000000000000..4ef0c3a3a7cc
--- /dev/null
+++ b/docs/source/get_started/introduction.rst
@@ -0,0 +1,437 @@
+Introduction by Example
+=======================
+
+We shortly introduce the fundamental concepts of PyG through self-contained examples.
+At its core, PyG provides the following main features:
+
+.. contents::
+    :local:
+
+Data Handling of Graphs
+-----------------------
+
+A graph is used to model pairwise relations (edges) between objects (nodes).
+A single graph in PyG is described by an instance of :class:`torch_geometric.data.Data`, which holds the following attributes by default:
+
+- :obj:`data.x`: Node feature matrix with shape :obj:`[num_nodes, num_node_features]`
+- :obj:`data.edge_index`: Graph connectivity in `COO format <https://pytorch.org/docs/stable/sparse.html#sparse-coo-docs>`_ with shape :obj:`[2, num_edges]` and type :obj:`torch.long`
+- :obj:`data.edge_attr`: Edge feature matrix with shape :obj:`[num_edges, num_edge_features]`
+- :obj:`data.y`: Target to train against (may have arbitrary shape), *e.g.*, node-level targets of shape :obj:`[num_nodes, *]` or graph-level targets of shape :obj:`[1, *]`
+- :obj:`data.pos`: Node position matrix with shape :obj:`[num_nodes, num_dimensions]`
+
+None of these attributes are required.
+In fact, the :class:`~torch_geometric.data.Data` object is not even restricted to these attributes.
+We can, *e.g.*, extend it by :obj:`data.face` to save the connectivity of triangles from a 3D mesh in a tensor with shape :obj:`[3, num_faces]` and type :obj:`torch.long`.
+
+.. Note::
+    PyTorch and :obj:`torchvision` define an example as a tuple of an image and a target.
+    We omit this notation in PyG to allow for various data structures in a clean and understandable way.
+
+We show a simple example of an unweighted and undirected graph with three nodes and four edges.
+Each node contains exactly one feature:
+
+.. code-block:: python
+
+    import torch
+    from torch_geometric.data import Data
+
+    edge_index = torch.tensor([[0, 1, 1, 2],
+                               [1, 0, 2, 1]], dtype=torch.long)
+    x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
+
+    data = Data(x=x, edge_index=edge_index)
+    >>> Data(edge_index=[2, 4], x=[3, 1])
+
+.. image:: ../_figures/graph.svg
+  :align: center
+  :width: 300px
+
+|
+
+Note that :obj:`edge_index`, *i.e.* the tensor defining the source and target nodes of all edges, is **not** a list of index tuples.
+If you want to write your indices this way, you should transpose and call :obj:`contiguous` on it before passing them to the data constructor:
+
+.. code-block:: python
+
+    import torch
+    from torch_geometric.data import Data
+
+    edge_index = torch.tensor([[0, 1],
+                               [1, 0],
+                               [1, 2],
+                               [2, 1]], dtype=torch.long)
+    x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
+
+    data = Data(x=x, edge_index=edge_index.t().contiguous())
+    >>> Data(edge_index=[2, 4], x=[3, 1])
+
+Although the graph has only two edges, we need to define four index tuples to account for both directions of a edge.
+
+.. Note::
+    You can print out your data object anytime and receive a short information about its attributes and their shapes.
+
+Note that it is necessary that the elements in :obj:`edge_index` only hold indices in the range :obj:`{ 0, ..., num_nodes - 1}`.
+This is needed as we want our final data representation to be as compact as possible, *e.g.*, we want to index the source and destination node features of the first edge :obj:`(0, 1)` via :obj:`x[0]` and :obj:`x[1]`, respectively.
+You can always check that your final :class:`~torch_geometric.data.Data` objects fulfill these requirements by running :meth:`~torch_geometric.data.Data.validate`:
+
+.. code-block:: python
+
+   data.validate(raise_on_error=True)
+
+Besides holding a number of node-level, edge-level or graph-level attributes, :class:`~torch_geometric.data.Data` provides a number of useful utility functions, *e.g.*:
+
+.. code-block:: python
+
+    print(data.keys)
+    >>> ['x', 'edge_index']
+
+    print(data['x'])
+    >>> tensor([[-1.0],
+                [0.0],
+                [1.0]])
+
+    for key, item in data:
+        print(f'{key} found in data')
+    >>> x found in data
+    >>> edge_index found in data
+
+    'edge_attr' in data
+    >>> False
+
+    data.num_nodes
+    >>> 3
+
+    data.num_edges
+    >>> 4
+
+    data.num_node_features
+    >>> 1
+
+    data.has_isolated_nodes()
+    >>> False
+
+    data.has_self_loops()
+    >>> False
+
+    data.is_directed()
+    >>> False
+
+    # Transfer data object to GPU.
+    device = torch.device('cuda')
+    data = data.to(device)
+
+You can find a complete list of all methods at :class:`torch_geometric.data.Data`.
+
+Common Benchmark Datasets
+-------------------------
+
+PyG contains a large number of common benchmark datasets, *e.g.*, all Planetoid datasets (Cora, Citeseer, Pubmed), all graph classification datasets from `http://graphkernels.cs.tu-dortmund.de <http://graphkernels.cs.tu-dortmund.de/>`_ and their `cleaned versions <https://github.com/nd7141/graph_datasets>`_, the QM7 and QM9 dataset, and a handful of 3D mesh/point cloud datasets like FAUST, ModelNet10/40 and ShapeNet.
+
+Initializing a dataset is straightforward.
+An initialization of a dataset will automatically download its raw files and process them to the previously described :class:`~torch_geometric.data.Data` format.
+*E.g.*, to load the ENZYMES dataset (consisting of 600 graphs within 6 classes), type:
+
+.. code-block:: python
+
+    from torch_geometric.datasets import TUDataset
+
+    dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
+    >>> ENZYMES(600)
+
+    len(dataset)
+    >>> 600
+
+    dataset.num_classes
+    >>> 6
+
+    dataset.num_node_features
+    >>> 3
+
+We now have access to all 600 graphs in the dataset:
+
+.. code-block:: python
+
+    data = dataset[0]
+    >>> Data(edge_index=[2, 168], x=[37, 3], y=[1])
+
+    data.is_undirected()
+    >>> True
+
+We can see that the first graph in the dataset contains 37 nodes, each one having 3 features.
+There are 168/2 = 84 undirected edges and the graph is assigned to exactly one class.
+In addition, the data object is holding exactly one graph-level target.
+
+We can even use slices, long or bool tensors to split the dataset.
+*E.g.*, to create a 90/10 train/test split, type:
+
+.. code-block:: python
+
+    train_dataset = dataset[:540]
+    >>> ENZYMES(540)
+
+    test_dataset = dataset[540:]
+    >>> ENZYMES(60)
+
+If you are unsure whether the dataset is already shuffled before you split, you can randomly permutate it by running:
+
+.. code-block:: python
+
+    dataset = dataset.shuffle()
+    >>> ENZYMES(600)
+
+This is equivalent of doing:
+
+.. code-block:: python
+
+    perm = torch.randperm(len(dataset))
+    dataset = dataset[perm]
+    >> ENZYMES(600)
+
+Let's try another one! Let's download Cora, the standard benchmark dataset for semi-supervised graph node classification:
+
+.. code-block:: python
+
+    from torch_geometric.datasets import Planetoid
+
+    dataset = Planetoid(root='/tmp/Cora', name='Cora')
+    >>> Cora()
+
+    len(dataset)
+    >>> 1
+
+    dataset.num_classes
+    >>> 7
+
+    dataset.num_node_features
+    >>> 1433
+
+Here, the dataset contains only a single, undirected citation graph:
+
+.. code-block:: python
+
+    data = dataset[0]
+    >>> Data(edge_index=[2, 10556], test_mask=[2708],
+             train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])
+
+    data.is_undirected()
+    >>> True
+
+    data.train_mask.sum().item()
+    >>> 140
+
+    data.val_mask.sum().item()
+    >>> 500
+
+    data.test_mask.sum().item()
+    >>> 1000
+
+This time, the :class:`~torch_geometric.data.Data` objects holds a label for each node, and additional node-level attributes: :obj:`train_mask`, :obj:`val_mask` and :obj:`test_mask`, where
+
+- :obj:`train_mask` denotes against which nodes to train (140 nodes),
+- :obj:`val_mask` denotes which nodes to use for validation, *e.g.*, to perform early stopping (500 nodes),
+- :obj:`test_mask` denotes against which nodes to test (1000 nodes).
+
+Mini-batches
+------------
+
+Neural networks are usually trained in a batch-wise fashion.
+PyG achieves parallelization over a mini-batch by creating sparse block diagonal adjacency matrices (defined by :obj:`edge_index`) and concatenating feature and target matrices in the node dimension.
+This composition allows differing number of nodes and edges over examples in one batch:
+
+.. math::
+
+    \mathbf{A} = \begin{bmatrix} \mathbf{A}_1 & & \\ & \ddots & \\ & & \mathbf{A}_n \end{bmatrix}, \qquad \mathbf{X} = \begin{bmatrix} \mathbf{X}_1 \\ \vdots \\ \mathbf{X}_n \end{bmatrix}, \qquad \mathbf{Y} = \begin{bmatrix} \mathbf{Y}_1 \\ \vdots \\ \mathbf{Y}_n \end{bmatrix}
+
+PyG contains its own :class:`torch_geometric.loader.DataLoader`, which already takes care of this concatenation process.
+Let's learn about it in an example:
+
+.. code-block:: python
+
+    from torch_geometric.datasets import TUDataset
+    from torch_geometric.loader import DataLoader
+
+    dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
+    loader = DataLoader(dataset, batch_size=32, shuffle=True)
+
+    for batch in loader:
+        batch
+        >>> DataBatch(batch=[1082], edge_index=[2, 4066], x=[1082, 21], y=[32])
+
+        batch.num_graphs
+        >>> 32
+
+:class:`torch_geometric.data.Batch` inherits from :class:`torch_geometric.data.Data` and contains an additional attribute called :obj:`batch`.
+
+:obj:`batch` is a column vector which maps each node to its respective graph in the batch:
+
+.. math::
+
+    \mathrm{batch} = {\begin{bmatrix} 0 & \cdots & 0 & 1 & \cdots & n - 2 & n -1 & \cdots & n - 1 \end{bmatrix}}^{\top}
+
+You can use it to, *e.g.*, average node features in the node dimension for each graph individually:
+
+.. code-block:: python
+
+    from torch_geometric.utils import scatter
+    from torch_geometric.datasets import TUDataset
+    from torch_geometric.loader import DataLoader
+
+    dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
+    loader = DataLoader(dataset, batch_size=32, shuffle=True)
+
+    for data in loader:
+        data
+        >>> DataBatch(batch=[1082], edge_index=[2, 4066], x=[1082, 21], y=[32])
+
+        data.num_graphs
+        >>> 32
+
+        x = scatter(data.x, data.batch, dim=0, reduce='mean')
+        x.size()
+        >>> torch.Size([32, 21])
+
+You can learn more about the internal batching procedure of PyG, *e.g.*, how to modify its behaviour, `here <https://pytorch-geometric.readthedocs.io/en/latest/notes/batching.html>`_.
+For documentation of scatter operations, we refer the interested reader to the :obj:`torch-scatter` `documentation <https://pytorch-scatter.readthedocs.io>`_.
+
+Data Transforms
+---------------
+
+Transforms are a common way in :obj:`torchvision` to transform images and perform augmentation.
+PyG comes with its own transforms, which expect a :class:`~torch_geometric.data.Data` object as input and return a new transformed :class:`~torch_geometric.data.Data` object.
+Transforms can be chained together using :class:`torch_geometric.transforms.Compose` and are applied before saving a processed dataset on disk (:obj:`pre_transform`) or before accessing a graph in a dataset (:obj:`transform`).
+
+Let's look at an example, where we apply transforms on the ShapeNet dataset (containing 17,000 3D shape point clouds and per point labels from 16 shape categories).
+
+.. code-block:: python
+
+    from torch_geometric.datasets import ShapeNet
+
+    dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'])
+
+    dataset[0]
+    >>> Data(pos=[2518, 3], y=[2518])
+
+We can convert the point cloud dataset into a graph dataset by generating nearest neighbor graphs from the point clouds via transforms:
+
+.. code-block:: python
+
+    import torch_geometric.transforms as T
+    from torch_geometric.datasets import ShapeNet
+
+    dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'],
+                        pre_transform=T.KNNGraph(k=6))
+
+    dataset[0]
+    >>> Data(edge_index=[2, 15108], pos=[2518, 3], y=[2518])
+
+.. note::
+    We use the :obj:`pre_transform` to convert the data before saving it to disk (leading to faster loading times).
+    Note that the next time the dataset is initialized it will already contain graph edges, even if you do not pass any transform.
+    If the :obj:`pre_transform` does not match with the one from the already processed dataset, you will be given a warning.
+
+In addition, we can use the :obj:`transform` argument to randomly augment a :class:`~torch_geometric.data.Data` object, *e.g.*, translating each node position by a small number:
+
+.. code-block:: python
+
+    import torch_geometric.transforms as T
+    from torch_geometric.datasets import ShapeNet
+
+    dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'],
+                        pre_transform=T.KNNGraph(k=6),
+                        transform=T.RandomJitter(0.01))
+
+    dataset[0]
+    >>> Data(edge_index=[2, 15108], pos=[2518, 3], y=[2518])
+
+You can find a complete list of all implemented transforms at :mod:`torch_geometric.transforms`.
+
+Learning Methods on Graphs
+--------------------------
+
+After learning about data handling, datasets, loader and transforms in PyG, it's time to implement our first graph neural network!
+
+We will use a simple GCN layer and replicate the experiments on the Cora citation dataset.
+For a high-level explanation on GCN, have a look at its `blog post <http://tkipf.github.io/graph-convolutional-networks/>`_.
+
+We first need to load the Cora dataset:
+
+.. code-block:: python
+
+    from torch_geometric.datasets import Planetoid
+
+    dataset = Planetoid(root='/tmp/Cora', name='Cora')
+    >>> Cora()
+
+Note that we do not need to use transforms or a dataloader.
+Now let's implement a two-layer GCN:
+
+.. code-block:: python
+
+    import torch
+    import torch.nn.functional as F
+    from torch_geometric.nn import GCNConv
+
+    class GCN(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv1 = GCNConv(dataset.num_node_features, 16)
+            self.conv2 = GCNConv(16, dataset.num_classes)
+
+        def forward(self, data):
+            x, edge_index = data.x, data.edge_index
+
+            x = self.conv1(x, edge_index)
+            x = F.relu(x)
+            x = F.dropout(x, training=self.training)
+            x = self.conv2(x, edge_index)
+
+            return F.log_softmax(x, dim=1)
+
+The constructor defines two :class:`~torch_geometric.nn.conv.GCNConv` layers which get called in the forward pass of our network.
+Note that the non-linearity is not integrated in the :obj:`conv` calls and hence needs to be applied afterwards (something which is consistent accross all operators in PyG).
+Here, we chose to use ReLU as our intermediate non-linearity and finally output a softmax distribution over the number of classes.
+Let's train this model on the training nodes for 200 epochs:
+
+.. code-block:: python
+
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = GCN().to(device)
+    data = dataset[0].to(device)
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
+
+    model.train()
+    for epoch in range(200):
+        optimizer.zero_grad()
+        out = model(data)
+        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
+        loss.backward()
+        optimizer.step()
+
+Finally, we can evaluate our model on the test nodes:
+
+.. code-block:: python
+
+    model.eval()
+    pred = model(data).argmax(dim=1)
+    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
+    acc = int(correct) / int(data.test_mask.sum())
+    print(f'Accuracy: {acc:.4f}')
+    >>> Accuracy: 0.8150
+
+This is all it takes to implement your first graph neural network.
+The easiest way to learn more about Graph Neural Networks is to study the examples in the :obj:`examples/` directory and to browse :mod:`torch_geometric.nn`.
+Happy hacking!
+
+Exercises
+---------
+
+1. What does :obj:`edge_index.t().contiguous()` do?
+
+2. Load the :obj:`"IMDB-BINARY"` dataset from the :class:`~torch_geometric.datasets.TUDataset` benchmark suite and randomly split it into 80%/10%/10% training, validation and test graphs.
+
+3. What does each number of the following output mean?
+
+   .. code-block:: python
+
+       print(batch)
+       >>> DataBatch(batch=[1082], edge_index=[2, 4066], x=[1082, 21], y=[32])
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 75fc2312b7e9..53727d091a80 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -11,29 +11,39 @@ In addition, it consists of easy-to-use mini-batch loaders for operating on many
 .. slack::
 
 .. toctree::
-   :glob:
    :maxdepth: 1
-   :caption: Notes
-
-   notes/installation
-   notes/introduction
-   notes/create_gnn
-   notes/create_dataset
-   notes/heterogeneous
-   notes/load_csv
-   notes/graphgym
-   notes/batching
-   notes/sparse_tensor
-   notes/jit
-   notes/remote
-   notes/explain
-   notes/cheatsheet
-   notes/data_cheatsheet
-   notes/colabs
-   notes/resources
+   :caption: Install PyG
+
+   install/installation
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Get Started
+
+   get_started/introduction
+   get_started/colabs
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Tutorials
+
+   tutorial/create_gnn
+   tutorial/create_dataset
+   tutorial/heterogeneous
+   tutorial/load_csv
+   tutorial/explain
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Advanced Concepts
+
+   advanced/batching
+   advanced/sparse_tensor
+   advanced/jit
+   advanced/remote
+   advanced/graphgym
 
 .. toctree::
-   :glob:
    :maxdepth: 1
    :caption: Package Reference
 
@@ -48,3 +58,16 @@ In addition, it consists of easy-to-use mini-batch loaders for operating on many
    modules/graphgym
    modules/explain
    modules/profile
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Cheatsheets
+
+   cheatsheet/cheatsheet
+   cheatsheet/data_cheatsheet
+
+.. toctree::
+   :maxdepth: 1
+   :caption: External Resources
+
+   external/resources
diff --git a/docs/source/install/installation.rst b/docs/source/install/installation.rst
new file mode 100644
index 000000000000..9c694a2e9f7a
--- /dev/null
+++ b/docs/source/install/installation.rst
@@ -0,0 +1,185 @@
+Installation
+============
+
+PyG is available for Python 3.7 to Python 3.10.
+
+.. note::
+   We do not recommend installation as a root user on your system Python.
+   Please setup a virtual environment, *e.g.*, via `Anaconda or Miniconda <https://conda.io/projects/conda/en/latest/user-guide/install>`_, or create a `Docker image <https://www.docker.com/>`_.
+
+Quick Start
+-----------
+
+.. raw:: html
+   :file: quick-start.html
+
+Installation via Anaconda
+-------------------------
+
+You can now install PyG via `Anaconda <https://anaconda.org/pyg/pyg>`_ for all major OS/PyTorch/CUDA combinations 🤗
+If you have not yet installed PyTorch, install it via :obj:`conda` as described in the `official PyTorch documentation <https://pytorch.org/get-started/locally/>`_.
+Given that you have PyTorch installed (:obj:`>=1.8.0`), simply run
+
+.. code-block:: none
+
+   conda install pyg -c pyg
+
+.. warning::
+   Conda packages are currently not available for M1/M2/M3 macs.
+   Please install the extension packages :ref:`from source<installation_from_source>`.
+
+Installation via Pip Wheels
+---------------------------
+
+We have outsourced a lot of functionality of PyG to other packages, which needs to be installed in advance.
+These packages come with their own CPU and GPU kernel implementations based on the `PyTorch C++/CUDA extension interface <https://github.com/pytorch/extension-cpp/>`_.
+We provide pip wheels for these packages for all major OS/PyTorch/CUDA combinations, see `here <https://data.pyg.org/whl>`__:
+
+.. warning::
+   Wheels are currently not available for M1/M2/M3 macs.
+   Please install the extension packages :ref:`from source<installation_from_source>`.
+
+#. Ensure that at least PyTorch 1.12.0 is installed:
+
+   .. code-block:: none
+
+      python -c "import torch; print(torch.__version__)"
+      >>> 1.13.0
+
+#. Find the CUDA version PyTorch was installed with:
+
+   .. code-block:: none
+
+      python -c "import torch; print(torch.version.cuda)"
+      >>> 11.6
+
+#. Install the relevant packages:
+
+   .. code-block:: none
+
+      pip install pyg-lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
+      pip install torch-geometric
+
+   where :obj:`${CUDA}` and :obj:`${TORCH}` should be replaced by the specific CUDA version (*e.g.*, :obj:`cpu`, :obj:`cu116`, or :obj:`cu117` for PyTorch 1.13, and :obj:`cpu`, :obj:`cu102`, :obj:`cu113`, or :obj:`116` for PyTorch 1.12) and PyTorch version (:obj:`1.11.0`, :obj:`1.12.0`), respectively.
+   For example, for PyTorch 1.13.* and CUDA 11.6, type:
+
+   .. code-block:: none
+
+      pip install pyg-lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-1.13.0+cu116.html
+      pip install torch-geometric
+
+   For PyTorch 1.12.* and CUDA 11.3, type:
+
+   .. code-block:: none
+
+     pip install pyg-lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-1.12.0+cu113.html
+     pip install torch-geometric
+
+#. Install additional packages *(optional)*:
+
+   To add additional functionality to PyG, such as k-NN and radius graph generation or :class:`~torch_geometric.nn.conv.SplineConv` support, run
+
+   .. code-block:: none
+
+      pip install torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
+
+   following the same procedure as mentioned above.
+
+**Note:** Binaries of older versions are also provided for PyTorch 1.4.0, PyTorch 1.5.0, PyTorch 1.6.0, PyTorch 1.7.0/1.7.1, PyTorch 1.8.0/1.8.1, PyTorch 1.9.0, PyTorch 1.10.0/1.10.1/1.10.2,a nd PyTorch 1.11.0 (following the same procedure).
+**For older versions, you need to explicitly specify the latest supported version number** or install via :obj:`pip install --no-index` in order to prevent a manual installation from source.
+You can look up the latest supported version number `here <https://data.pyg.org/whl>`__.
+
+.. _installation_from_source:
+
+Installation from Source
+------------------------
+
+In case a specific version is not supported by `our wheels <https://data.pyg.org/whl>`_, you can alternatively install PyG from source:
+
+#. Ensure that your CUDA is setup correctly (optional):
+
+   #. Check if PyTorch is installed with CUDA support:
+
+      .. code-block:: none
+
+         python -c "import torch; print(torch.cuda.is_available())"
+         >>> True
+
+   #. Add CUDA to :obj:`$PATH` and :obj:`$CPATH` (note that your actual CUDA path may vary from :obj:`/usr/local/cuda`):
+
+      .. code-block:: none
+
+         export PATH=/usr/local/cuda/bin:$PATH
+         echo $PATH
+         >>> /usr/local/cuda/bin:...
+
+         export CPATH=/usr/local/cuda/include:$CPATH
+         echo $CPATH
+         >>> /usr/local/cuda/include:...
+
+   #. Add CUDA to :obj:`$LD_LIBRARY_PATH` on Linux and to :obj:`$DYLD_LIBRARY_PATH` on macOS (note that your actual CUDA path may vary from :obj:`/usr/local/cuda`):
+
+      .. code-block:: none
+
+         export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+         echo $LD_LIBRARY_PATH
+         >>> /usr/local/cuda/lib64:...
+
+         export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
+         echo $DYLD_LIBRARY_PATH
+         >>> /usr/local/cuda/lib:...
+
+   #. Verify that :obj:`nvcc` is accessible from terminal:
+
+      .. code-block:: none
+
+         nvcc --version
+         >>> 11.3
+
+   #. Ensure that PyTorch and system CUDA versions match:
+
+      .. code-block:: none
+
+         python -c "import torch; print(torch.version.cuda)"
+         >>> 11.3
+
+         nvcc --version
+         >>> 11.3
+
+#. Install the relevant packages:
+
+   .. code-block:: none
+
+      pip install git+https://github.com/pyg-team/pyg-lib.git
+      pip install torch-scatter
+      pip install torch-sparse
+      pip install torch-geometric
+
+#. Install additional packages *(optional)*:
+
+   .. code-block:: none
+
+      pip install torch-cluster
+      pip install torch-spline-conv
+
+In rare cases, CUDA or Python path problems can prevent a successful installation.
+:obj:`pip` may even signal a successful installation, but execution simply crashes with :obj:`Segmentation fault (core dumped)`.
+We collected common installation errors in the `Frequently Asked Questions <https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html#frequently-asked-questions>`_ subsection.
+In case the FAQ does not help you in solving your problem, please create an `issue <https://github.com/pyg-team/pytorch_geometric/issues>`_.
+Before, please verify that your CUDA is set up correctly by following the official `installation guide <https://docs.nvidia.com/cuda>`_.
+
+Frequently Asked Questions
+--------------------------
+
+#. :obj:`undefined symbol: **make_function_schema**`: This issue signals (1) a **version conflict** between your installed PyTorch version and the :obj:`${TORCH}` version specified to install the extension packages, or (2) a version conflict between the installed CUDA version of PyTorch and the :obj:`${CUDA}` version specified to install the extension packages.
+   Please verify that your PyTorch version and its CUDA version **match** with your installation command:
+
+   .. code-block:: none
+
+      python -c "import torch; print(torch.__version__)"
+      python -c "import torch; print(torch.version.cuda)"
+      nvcc --version
+
+   For re-installation, ensure that you do not run into any caching issues by using the :obj:`pip --force-reinstall --no-cache-dir` flags.
+   In addition, the :obj:`pip --verbose` option may help to track down any issues during installation.
+   If you still do not find any success in installation, please try to install the extension packages :ref:`from source<installation_from_source>`.
diff --git a/docs/source/notes/quick-start.html b/docs/source/install/quick-start.html
similarity index 100%
rename from docs/source/notes/quick-start.html
rename to docs/source/install/quick-start.html
diff --git a/docs/source/notes/installation.rst b/docs/source/notes/installation.rst
index 9c694a2e9f7a..f7fc983247a9 100644
--- a/docs/source/notes/installation.rst
+++ b/docs/source/notes/installation.rst
@@ -1,185 +1 @@
-Installation
-============
-
-PyG is available for Python 3.7 to Python 3.10.
-
-.. note::
-   We do not recommend installation as a root user on your system Python.
-   Please setup a virtual environment, *e.g.*, via `Anaconda or Miniconda <https://conda.io/projects/conda/en/latest/user-guide/install>`_, or create a `Docker image <https://www.docker.com/>`_.
-
-Quick Start
------------
-
-.. raw:: html
-   :file: quick-start.html
-
-Installation via Anaconda
--------------------------
-
-You can now install PyG via `Anaconda <https://anaconda.org/pyg/pyg>`_ for all major OS/PyTorch/CUDA combinations 🤗
-If you have not yet installed PyTorch, install it via :obj:`conda` as described in the `official PyTorch documentation <https://pytorch.org/get-started/locally/>`_.
-Given that you have PyTorch installed (:obj:`>=1.8.0`), simply run
-
-.. code-block:: none
-
-   conda install pyg -c pyg
-
-.. warning::
-   Conda packages are currently not available for M1/M2/M3 macs.
-   Please install the extension packages :ref:`from source<installation_from_source>`.
-
-Installation via Pip Wheels
----------------------------
-
-We have outsourced a lot of functionality of PyG to other packages, which needs to be installed in advance.
-These packages come with their own CPU and GPU kernel implementations based on the `PyTorch C++/CUDA extension interface <https://github.com/pytorch/extension-cpp/>`_.
-We provide pip wheels for these packages for all major OS/PyTorch/CUDA combinations, see `here <https://data.pyg.org/whl>`__:
-
-.. warning::
-   Wheels are currently not available for M1/M2/M3 macs.
-   Please install the extension packages :ref:`from source<installation_from_source>`.
-
-#. Ensure that at least PyTorch 1.12.0 is installed:
-
-   .. code-block:: none
-
-      python -c "import torch; print(torch.__version__)"
-      >>> 1.13.0
-
-#. Find the CUDA version PyTorch was installed with:
-
-   .. code-block:: none
-
-      python -c "import torch; print(torch.version.cuda)"
-      >>> 11.6
-
-#. Install the relevant packages:
-
-   .. code-block:: none
-
-      pip install pyg-lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
-      pip install torch-geometric
-
-   where :obj:`${CUDA}` and :obj:`${TORCH}` should be replaced by the specific CUDA version (*e.g.*, :obj:`cpu`, :obj:`cu116`, or :obj:`cu117` for PyTorch 1.13, and :obj:`cpu`, :obj:`cu102`, :obj:`cu113`, or :obj:`116` for PyTorch 1.12) and PyTorch version (:obj:`1.11.0`, :obj:`1.12.0`), respectively.
-   For example, for PyTorch 1.13.* and CUDA 11.6, type:
-
-   .. code-block:: none
-
-      pip install pyg-lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-1.13.0+cu116.html
-      pip install torch-geometric
-
-   For PyTorch 1.12.* and CUDA 11.3, type:
-
-   .. code-block:: none
-
-     pip install pyg-lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-1.12.0+cu113.html
-     pip install torch-geometric
-
-#. Install additional packages *(optional)*:
-
-   To add additional functionality to PyG, such as k-NN and radius graph generation or :class:`~torch_geometric.nn.conv.SplineConv` support, run
-
-   .. code-block:: none
-
-      pip install torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
-
-   following the same procedure as mentioned above.
-
-**Note:** Binaries of older versions are also provided for PyTorch 1.4.0, PyTorch 1.5.0, PyTorch 1.6.0, PyTorch 1.7.0/1.7.1, PyTorch 1.8.0/1.8.1, PyTorch 1.9.0, PyTorch 1.10.0/1.10.1/1.10.2,a nd PyTorch 1.11.0 (following the same procedure).
-**For older versions, you need to explicitly specify the latest supported version number** or install via :obj:`pip install --no-index` in order to prevent a manual installation from source.
-You can look up the latest supported version number `here <https://data.pyg.org/whl>`__.
-
-.. _installation_from_source:
-
-Installation from Source
-------------------------
-
-In case a specific version is not supported by `our wheels <https://data.pyg.org/whl>`_, you can alternatively install PyG from source:
-
-#. Ensure that your CUDA is setup correctly (optional):
-
-   #. Check if PyTorch is installed with CUDA support:
-
-      .. code-block:: none
-
-         python -c "import torch; print(torch.cuda.is_available())"
-         >>> True
-
-   #. Add CUDA to :obj:`$PATH` and :obj:`$CPATH` (note that your actual CUDA path may vary from :obj:`/usr/local/cuda`):
-
-      .. code-block:: none
-
-         export PATH=/usr/local/cuda/bin:$PATH
-         echo $PATH
-         >>> /usr/local/cuda/bin:...
-
-         export CPATH=/usr/local/cuda/include:$CPATH
-         echo $CPATH
-         >>> /usr/local/cuda/include:...
-
-   #. Add CUDA to :obj:`$LD_LIBRARY_PATH` on Linux and to :obj:`$DYLD_LIBRARY_PATH` on macOS (note that your actual CUDA path may vary from :obj:`/usr/local/cuda`):
-
-      .. code-block:: none
-
-         export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
-         echo $LD_LIBRARY_PATH
-         >>> /usr/local/cuda/lib64:...
-
-         export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
-         echo $DYLD_LIBRARY_PATH
-         >>> /usr/local/cuda/lib:...
-
-   #. Verify that :obj:`nvcc` is accessible from terminal:
-
-      .. code-block:: none
-
-         nvcc --version
-         >>> 11.3
-
-   #. Ensure that PyTorch and system CUDA versions match:
-
-      .. code-block:: none
-
-         python -c "import torch; print(torch.version.cuda)"
-         >>> 11.3
-
-         nvcc --version
-         >>> 11.3
-
-#. Install the relevant packages:
-
-   .. code-block:: none
-
-      pip install git+https://github.com/pyg-team/pyg-lib.git
-      pip install torch-scatter
-      pip install torch-sparse
-      pip install torch-geometric
-
-#. Install additional packages *(optional)*:
-
-   .. code-block:: none
-
-      pip install torch-cluster
-      pip install torch-spline-conv
-
-In rare cases, CUDA or Python path problems can prevent a successful installation.
-:obj:`pip` may even signal a successful installation, but execution simply crashes with :obj:`Segmentation fault (core dumped)`.
-We collected common installation errors in the `Frequently Asked Questions <https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html#frequently-asked-questions>`_ subsection.
-In case the FAQ does not help you in solving your problem, please create an `issue <https://github.com/pyg-team/pytorch_geometric/issues>`_.
-Before, please verify that your CUDA is set up correctly by following the official `installation guide <https://docs.nvidia.com/cuda>`_.
-
-Frequently Asked Questions
---------------------------
-
-#. :obj:`undefined symbol: **make_function_schema**`: This issue signals (1) a **version conflict** between your installed PyTorch version and the :obj:`${TORCH}` version specified to install the extension packages, or (2) a version conflict between the installed CUDA version of PyTorch and the :obj:`${CUDA}` version specified to install the extension packages.
-   Please verify that your PyTorch version and its CUDA version **match** with your installation command:
-
-   .. code-block:: none
-
-      python -c "import torch; print(torch.__version__)"
-      python -c "import torch; print(torch.version.cuda)"
-      nvcc --version
-
-   For re-installation, ensure that you do not run into any caching issues by using the :obj:`pip --force-reinstall --no-cache-dir` flags.
-   In addition, the :obj:`pip --verbose` option may help to track down any issues during installation.
-   If you still do not find any success in installation, please try to install the extension packages :ref:`from source<installation_from_source>`.
+.. include:: ../install/installation.rst
diff --git a/docs/source/notes/introduction.rst b/docs/source/notes/introduction.rst
index 4ef0c3a3a7cc..fa73a13fab7e 100644
--- a/docs/source/notes/introduction.rst
+++ b/docs/source/notes/introduction.rst
@@ -1,437 +1 @@
-Introduction by Example
-=======================
-
-We shortly introduce the fundamental concepts of PyG through self-contained examples.
-At its core, PyG provides the following main features:
-
-.. contents::
-    :local:
-
-Data Handling of Graphs
------------------------
-
-A graph is used to model pairwise relations (edges) between objects (nodes).
-A single graph in PyG is described by an instance of :class:`torch_geometric.data.Data`, which holds the following attributes by default:
-
-- :obj:`data.x`: Node feature matrix with shape :obj:`[num_nodes, num_node_features]`
-- :obj:`data.edge_index`: Graph connectivity in `COO format <https://pytorch.org/docs/stable/sparse.html#sparse-coo-docs>`_ with shape :obj:`[2, num_edges]` and type :obj:`torch.long`
-- :obj:`data.edge_attr`: Edge feature matrix with shape :obj:`[num_edges, num_edge_features]`
-- :obj:`data.y`: Target to train against (may have arbitrary shape), *e.g.*, node-level targets of shape :obj:`[num_nodes, *]` or graph-level targets of shape :obj:`[1, *]`
-- :obj:`data.pos`: Node position matrix with shape :obj:`[num_nodes, num_dimensions]`
-
-None of these attributes are required.
-In fact, the :class:`~torch_geometric.data.Data` object is not even restricted to these attributes.
-We can, *e.g.*, extend it by :obj:`data.face` to save the connectivity of triangles from a 3D mesh in a tensor with shape :obj:`[3, num_faces]` and type :obj:`torch.long`.
-
-.. Note::
-    PyTorch and :obj:`torchvision` define an example as a tuple of an image and a target.
-    We omit this notation in PyG to allow for various data structures in a clean and understandable way.
-
-We show a simple example of an unweighted and undirected graph with three nodes and four edges.
-Each node contains exactly one feature:
-
-.. code-block:: python
-
-    import torch
-    from torch_geometric.data import Data
-
-    edge_index = torch.tensor([[0, 1, 1, 2],
-                               [1, 0, 2, 1]], dtype=torch.long)
-    x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
-
-    data = Data(x=x, edge_index=edge_index)
-    >>> Data(edge_index=[2, 4], x=[3, 1])
-
-.. image:: ../_figures/graph.svg
-  :align: center
-  :width: 300px
-
-|
-
-Note that :obj:`edge_index`, *i.e.* the tensor defining the source and target nodes of all edges, is **not** a list of index tuples.
-If you want to write your indices this way, you should transpose and call :obj:`contiguous` on it before passing them to the data constructor:
-
-.. code-block:: python
-
-    import torch
-    from torch_geometric.data import Data
-
-    edge_index = torch.tensor([[0, 1],
-                               [1, 0],
-                               [1, 2],
-                               [2, 1]], dtype=torch.long)
-    x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
-
-    data = Data(x=x, edge_index=edge_index.t().contiguous())
-    >>> Data(edge_index=[2, 4], x=[3, 1])
-
-Although the graph has only two edges, we need to define four index tuples to account for both directions of a edge.
-
-.. Note::
-    You can print out your data object anytime and receive a short information about its attributes and their shapes.
-
-Note that it is necessary that the elements in :obj:`edge_index` only hold indices in the range :obj:`{ 0, ..., num_nodes - 1}`.
-This is needed as we want our final data representation to be as compact as possible, *e.g.*, we want to index the source and destination node features of the first edge :obj:`(0, 1)` via :obj:`x[0]` and :obj:`x[1]`, respectively.
-You can always check that your final :class:`~torch_geometric.data.Data` objects fulfill these requirements by running :meth:`~torch_geometric.data.Data.validate`:
-
-.. code-block:: python
-
-   data.validate(raise_on_error=True)
-
-Besides holding a number of node-level, edge-level or graph-level attributes, :class:`~torch_geometric.data.Data` provides a number of useful utility functions, *e.g.*:
-
-.. code-block:: python
-
-    print(data.keys)
-    >>> ['x', 'edge_index']
-
-    print(data['x'])
-    >>> tensor([[-1.0],
-                [0.0],
-                [1.0]])
-
-    for key, item in data:
-        print(f'{key} found in data')
-    >>> x found in data
-    >>> edge_index found in data
-
-    'edge_attr' in data
-    >>> False
-
-    data.num_nodes
-    >>> 3
-
-    data.num_edges
-    >>> 4
-
-    data.num_node_features
-    >>> 1
-
-    data.has_isolated_nodes()
-    >>> False
-
-    data.has_self_loops()
-    >>> False
-
-    data.is_directed()
-    >>> False
-
-    # Transfer data object to GPU.
-    device = torch.device('cuda')
-    data = data.to(device)
-
-You can find a complete list of all methods at :class:`torch_geometric.data.Data`.
-
-Common Benchmark Datasets
--------------------------
-
-PyG contains a large number of common benchmark datasets, *e.g.*, all Planetoid datasets (Cora, Citeseer, Pubmed), all graph classification datasets from `http://graphkernels.cs.tu-dortmund.de <http://graphkernels.cs.tu-dortmund.de/>`_ and their `cleaned versions <https://github.com/nd7141/graph_datasets>`_, the QM7 and QM9 dataset, and a handful of 3D mesh/point cloud datasets like FAUST, ModelNet10/40 and ShapeNet.
-
-Initializing a dataset is straightforward.
-An initialization of a dataset will automatically download its raw files and process them to the previously described :class:`~torch_geometric.data.Data` format.
-*E.g.*, to load the ENZYMES dataset (consisting of 600 graphs within 6 classes), type:
-
-.. code-block:: python
-
-    from torch_geometric.datasets import TUDataset
-
-    dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
-    >>> ENZYMES(600)
-
-    len(dataset)
-    >>> 600
-
-    dataset.num_classes
-    >>> 6
-
-    dataset.num_node_features
-    >>> 3
-
-We now have access to all 600 graphs in the dataset:
-
-.. code-block:: python
-
-    data = dataset[0]
-    >>> Data(edge_index=[2, 168], x=[37, 3], y=[1])
-
-    data.is_undirected()
-    >>> True
-
-We can see that the first graph in the dataset contains 37 nodes, each one having 3 features.
-There are 168/2 = 84 undirected edges and the graph is assigned to exactly one class.
-In addition, the data object is holding exactly one graph-level target.
-
-We can even use slices, long or bool tensors to split the dataset.
-*E.g.*, to create a 90/10 train/test split, type:
-
-.. code-block:: python
-
-    train_dataset = dataset[:540]
-    >>> ENZYMES(540)
-
-    test_dataset = dataset[540:]
-    >>> ENZYMES(60)
-
-If you are unsure whether the dataset is already shuffled before you split, you can randomly permutate it by running:
-
-.. code-block:: python
-
-    dataset = dataset.shuffle()
-    >>> ENZYMES(600)
-
-This is equivalent of doing:
-
-.. code-block:: python
-
-    perm = torch.randperm(len(dataset))
-    dataset = dataset[perm]
-    >> ENZYMES(600)
-
-Let's try another one! Let's download Cora, the standard benchmark dataset for semi-supervised graph node classification:
-
-.. code-block:: python
-
-    from torch_geometric.datasets import Planetoid
-
-    dataset = Planetoid(root='/tmp/Cora', name='Cora')
-    >>> Cora()
-
-    len(dataset)
-    >>> 1
-
-    dataset.num_classes
-    >>> 7
-
-    dataset.num_node_features
-    >>> 1433
-
-Here, the dataset contains only a single, undirected citation graph:
-
-.. code-block:: python
-
-    data = dataset[0]
-    >>> Data(edge_index=[2, 10556], test_mask=[2708],
-             train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])
-
-    data.is_undirected()
-    >>> True
-
-    data.train_mask.sum().item()
-    >>> 140
-
-    data.val_mask.sum().item()
-    >>> 500
-
-    data.test_mask.sum().item()
-    >>> 1000
-
-This time, the :class:`~torch_geometric.data.Data` objects holds a label for each node, and additional node-level attributes: :obj:`train_mask`, :obj:`val_mask` and :obj:`test_mask`, where
-
-- :obj:`train_mask` denotes against which nodes to train (140 nodes),
-- :obj:`val_mask` denotes which nodes to use for validation, *e.g.*, to perform early stopping (500 nodes),
-- :obj:`test_mask` denotes against which nodes to test (1000 nodes).
-
-Mini-batches
-------------
-
-Neural networks are usually trained in a batch-wise fashion.
-PyG achieves parallelization over a mini-batch by creating sparse block diagonal adjacency matrices (defined by :obj:`edge_index`) and concatenating feature and target matrices in the node dimension.
-This composition allows differing number of nodes and edges over examples in one batch:
-
-.. math::
-
-    \mathbf{A} = \begin{bmatrix} \mathbf{A}_1 & & \\ & \ddots & \\ & & \mathbf{A}_n \end{bmatrix}, \qquad \mathbf{X} = \begin{bmatrix} \mathbf{X}_1 \\ \vdots \\ \mathbf{X}_n \end{bmatrix}, \qquad \mathbf{Y} = \begin{bmatrix} \mathbf{Y}_1 \\ \vdots \\ \mathbf{Y}_n \end{bmatrix}
-
-PyG contains its own :class:`torch_geometric.loader.DataLoader`, which already takes care of this concatenation process.
-Let's learn about it in an example:
-
-.. code-block:: python
-
-    from torch_geometric.datasets import TUDataset
-    from torch_geometric.loader import DataLoader
-
-    dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
-    loader = DataLoader(dataset, batch_size=32, shuffle=True)
-
-    for batch in loader:
-        batch
-        >>> DataBatch(batch=[1082], edge_index=[2, 4066], x=[1082, 21], y=[32])
-
-        batch.num_graphs
-        >>> 32
-
-:class:`torch_geometric.data.Batch` inherits from :class:`torch_geometric.data.Data` and contains an additional attribute called :obj:`batch`.
-
-:obj:`batch` is a column vector which maps each node to its respective graph in the batch:
-
-.. math::
-
-    \mathrm{batch} = {\begin{bmatrix} 0 & \cdots & 0 & 1 & \cdots & n - 2 & n -1 & \cdots & n - 1 \end{bmatrix}}^{\top}
-
-You can use it to, *e.g.*, average node features in the node dimension for each graph individually:
-
-.. code-block:: python
-
-    from torch_geometric.utils import scatter
-    from torch_geometric.datasets import TUDataset
-    from torch_geometric.loader import DataLoader
-
-    dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
-    loader = DataLoader(dataset, batch_size=32, shuffle=True)
-
-    for data in loader:
-        data
-        >>> DataBatch(batch=[1082], edge_index=[2, 4066], x=[1082, 21], y=[32])
-
-        data.num_graphs
-        >>> 32
-
-        x = scatter(data.x, data.batch, dim=0, reduce='mean')
-        x.size()
-        >>> torch.Size([32, 21])
-
-You can learn more about the internal batching procedure of PyG, *e.g.*, how to modify its behaviour, `here <https://pytorch-geometric.readthedocs.io/en/latest/notes/batching.html>`_.
-For documentation of scatter operations, we refer the interested reader to the :obj:`torch-scatter` `documentation <https://pytorch-scatter.readthedocs.io>`_.
-
-Data Transforms
----------------
-
-Transforms are a common way in :obj:`torchvision` to transform images and perform augmentation.
-PyG comes with its own transforms, which expect a :class:`~torch_geometric.data.Data` object as input and return a new transformed :class:`~torch_geometric.data.Data` object.
-Transforms can be chained together using :class:`torch_geometric.transforms.Compose` and are applied before saving a processed dataset on disk (:obj:`pre_transform`) or before accessing a graph in a dataset (:obj:`transform`).
-
-Let's look at an example, where we apply transforms on the ShapeNet dataset (containing 17,000 3D shape point clouds and per point labels from 16 shape categories).
-
-.. code-block:: python
-
-    from torch_geometric.datasets import ShapeNet
-
-    dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'])
-
-    dataset[0]
-    >>> Data(pos=[2518, 3], y=[2518])
-
-We can convert the point cloud dataset into a graph dataset by generating nearest neighbor graphs from the point clouds via transforms:
-
-.. code-block:: python
-
-    import torch_geometric.transforms as T
-    from torch_geometric.datasets import ShapeNet
-
-    dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'],
-                        pre_transform=T.KNNGraph(k=6))
-
-    dataset[0]
-    >>> Data(edge_index=[2, 15108], pos=[2518, 3], y=[2518])
-
-.. note::
-    We use the :obj:`pre_transform` to convert the data before saving it to disk (leading to faster loading times).
-    Note that the next time the dataset is initialized it will already contain graph edges, even if you do not pass any transform.
-    If the :obj:`pre_transform` does not match with the one from the already processed dataset, you will be given a warning.
-
-In addition, we can use the :obj:`transform` argument to randomly augment a :class:`~torch_geometric.data.Data` object, *e.g.*, translating each node position by a small number:
-
-.. code-block:: python
-
-    import torch_geometric.transforms as T
-    from torch_geometric.datasets import ShapeNet
-
-    dataset = ShapeNet(root='/tmp/ShapeNet', categories=['Airplane'],
-                        pre_transform=T.KNNGraph(k=6),
-                        transform=T.RandomJitter(0.01))
-
-    dataset[0]
-    >>> Data(edge_index=[2, 15108], pos=[2518, 3], y=[2518])
-
-You can find a complete list of all implemented transforms at :mod:`torch_geometric.transforms`.
-
-Learning Methods on Graphs
---------------------------
-
-After learning about data handling, datasets, loader and transforms in PyG, it's time to implement our first graph neural network!
-
-We will use a simple GCN layer and replicate the experiments on the Cora citation dataset.
-For a high-level explanation on GCN, have a look at its `blog post <http://tkipf.github.io/graph-convolutional-networks/>`_.
-
-We first need to load the Cora dataset:
-
-.. code-block:: python
-
-    from torch_geometric.datasets import Planetoid
-
-    dataset = Planetoid(root='/tmp/Cora', name='Cora')
-    >>> Cora()
-
-Note that we do not need to use transforms or a dataloader.
-Now let's implement a two-layer GCN:
-
-.. code-block:: python
-
-    import torch
-    import torch.nn.functional as F
-    from torch_geometric.nn import GCNConv
-
-    class GCN(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.conv1 = GCNConv(dataset.num_node_features, 16)
-            self.conv2 = GCNConv(16, dataset.num_classes)
-
-        def forward(self, data):
-            x, edge_index = data.x, data.edge_index
-
-            x = self.conv1(x, edge_index)
-            x = F.relu(x)
-            x = F.dropout(x, training=self.training)
-            x = self.conv2(x, edge_index)
-
-            return F.log_softmax(x, dim=1)
-
-The constructor defines two :class:`~torch_geometric.nn.conv.GCNConv` layers which get called in the forward pass of our network.
-Note that the non-linearity is not integrated in the :obj:`conv` calls and hence needs to be applied afterwards (something which is consistent accross all operators in PyG).
-Here, we chose to use ReLU as our intermediate non-linearity and finally output a softmax distribution over the number of classes.
-Let's train this model on the training nodes for 200 epochs:
-
-.. code-block:: python
-
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model = GCN().to(device)
-    data = dataset[0].to(device)
-    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
-
-    model.train()
-    for epoch in range(200):
-        optimizer.zero_grad()
-        out = model(data)
-        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
-        loss.backward()
-        optimizer.step()
-
-Finally, we can evaluate our model on the test nodes:
-
-.. code-block:: python
-
-    model.eval()
-    pred = model(data).argmax(dim=1)
-    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
-    acc = int(correct) / int(data.test_mask.sum())
-    print(f'Accuracy: {acc:.4f}')
-    >>> Accuracy: 0.8150
-
-This is all it takes to implement your first graph neural network.
-The easiest way to learn more about Graph Neural Networks is to study the examples in the :obj:`examples/` directory and to browse :mod:`torch_geometric.nn`.
-Happy hacking!
-
-Exercises
----------
-
-1. What does :obj:`edge_index.t().contiguous()` do?
-
-2. Load the :obj:`"IMDB-BINARY"` dataset from the :class:`~torch_geometric.datasets.TUDataset` benchmark suite and randomly split it into 80%/10%/10% training, validation and test graphs.
-
-3. What does each number of the following output mean?
-
-   .. code-block:: python
-
-       print(batch)
-       >>> DataBatch(batch=[1082], edge_index=[2, 4066], x=[1082, 21], y=[32])
+.. include:: ../get_started/introduction.rst
diff --git a/docs/source/notes/create_dataset.rst b/docs/source/tutorial/create_dataset.rst
similarity index 100%
rename from docs/source/notes/create_dataset.rst
rename to docs/source/tutorial/create_dataset.rst
diff --git a/docs/source/notes/create_gnn.rst b/docs/source/tutorial/create_gnn.rst
similarity index 100%
rename from docs/source/notes/create_gnn.rst
rename to docs/source/tutorial/create_gnn.rst
diff --git a/docs/source/notes/explain.rst b/docs/source/tutorial/explain.rst
similarity index 100%
rename from docs/source/notes/explain.rst
rename to docs/source/tutorial/explain.rst
diff --git a/docs/source/notes/heterogeneous.rst b/docs/source/tutorial/heterogeneous.rst
similarity index 100%
rename from docs/source/notes/heterogeneous.rst
rename to docs/source/tutorial/heterogeneous.rst
diff --git a/docs/source/notes/load_csv.rst b/docs/source/tutorial/load_csv.rst
similarity index 100%
rename from docs/source/notes/load_csv.rst
rename to docs/source/tutorial/load_csv.rst

From d183e1ac7781da93d3245635cbd0604f999c49aa Mon Sep 17 00:00:00 2001
From: rusty1s <matthias.fey@tu-dortmund.de>
Date: Sat, 14 Jan 2023 10:03:28 +0100
Subject: [PATCH 2/4] update

---
 docs/source/notes/installation.rst | 1 -
 docs/source/notes/introduction.rst | 1 -
 2 files changed, 2 deletions(-)
 delete mode 100644 docs/source/notes/installation.rst
 delete mode 100644 docs/source/notes/introduction.rst

diff --git a/docs/source/notes/installation.rst b/docs/source/notes/installation.rst
deleted file mode 100644
index f7fc983247a9..000000000000
--- a/docs/source/notes/installation.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../install/installation.rst
diff --git a/docs/source/notes/introduction.rst b/docs/source/notes/introduction.rst
deleted file mode 100644
index fa73a13fab7e..000000000000
--- a/docs/source/notes/introduction.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. include:: ../get_started/introduction.rst

From e9c1d2da3d8acadc885675ad7a63c2d3902bab2a Mon Sep 17 00:00:00 2001
From: rusty1s <matthias.fey@tu-dortmund.de>
Date: Sat, 14 Jan 2023 10:04:53 +0100
Subject: [PATCH 3/4] update

---
 CHANGELOG.md                                                  | 1 +
 docs/source/cheatsheet/{cheatsheet.rst => gnn_cheatsheet.rst} | 0
 2 files changed, 1 insertion(+)
 rename docs/source/cheatsheet/{cheatsheet.rst => gnn_cheatsheet.rst} (100%)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ad94b83f5f4f..57f1266d952b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added support for dropping nodes in `utils.to_dense_batch` in case `max_num_nodes` is smaller than the number of nodes  ([#6124](https://github.com/pyg-team/pytorch_geometric/pull/6124))
 - Added the RandLA-Net architecture as an example ([#5117](https://github.com/pyg-team/pytorch_geometric/pull/5117))
 ### Changed
+- Re-structured the documentation ([#6420](https://github.com/pyg-team/pytorch_geometric/pull/6420))
 - Fix the default arguments of `DataParallel` class ([#6376](https://github.com/pyg-team/pytorch_geometric/pull/6376))
 - Fix `ImbalancedSampler` on sliced `InMemoryDataset` ([#6374](https://github.com/pyg-team/pytorch_geometric/pull/6374))
 - Breaking Change: Changed the interface and implementation of `GraphMultisetTransformer` ([#6343](https://github.com/pyg-team/pytorch_geometric/pull/6343))
diff --git a/docs/source/cheatsheet/cheatsheet.rst b/docs/source/cheatsheet/gnn_cheatsheet.rst
similarity index 100%
rename from docs/source/cheatsheet/cheatsheet.rst
rename to docs/source/cheatsheet/gnn_cheatsheet.rst

From 9b3df73c90201478c56f76b575627e713c34d6e6 Mon Sep 17 00:00:00 2001
From: rusty1s <matthias.fey@tu-dortmund.de>
Date: Sat, 14 Jan 2023 10:06:52 +0100
Subject: [PATCH 4/4] update

---
 docs/source/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 53727d091a80..976ec4807077 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -63,7 +63,7 @@ In addition, it consists of easy-to-use mini-batch loaders for operating on many
    :maxdepth: 1
    :caption: Cheatsheets
 
-   cheatsheet/cheatsheet
+   cheatsheet/gnn_cheatsheet
    cheatsheet/data_cheatsheet
 
 .. toctree::