geometric-intelligence · gbg141 · Oct 9, 2024 · Sep 13, 2024 · Sep 15, 2024 · Sep 16, 2024
diff --git a/README.md b/README.md
@@ -43,7 +43,7 @@ Assess how your model compares against state-of-the-art topological neural netwo
   <img src="resources/workflow.jpg" width="700">
 </p>
 
-The main pipeline trains and evaluates a wide range of state-of-the-art TNNs and Graph Neural Networks (GNNs) (see <a href="#gear-neural-networks">:gear: Neural Networks</a>) on numerous and varied datasets and benchmark tasks (see <a href="#books-datasets">:books: Datasets</a> ). 
+The main pipeline trains and evaluates a wide range of state-of-the-art TNNs and Graph Neural Networks (GNNs) (see <a href="#gear-neural-networks">:gear: Neural Networks</a>) on numerous and varied datasets and benchmark tasks (see <a href="#books-datasets">:books: Datasets</a> ). Through TopoTune (see <a href="#bulb-topotune">:bulb: TopoTune</a>), the library provides easy access to training and testing an entire landscape of graph-based TNNs, new or existing, on any topological domain.
 
 Additionally, the library offers the ability to transform, i.e. _lift_, each dataset from one topological domain to another (see <a href="#rocket-liftings">:rocket: Liftings</a>), enabling for the first time an exhaustive inter-domain comparison of TNNs.
 
@@ -142,6 +142,20 @@ We list the neural networks trained and evaluated by `TopoBenchmarkX`, organized
 | UniGNN | [UniGNN: a Unified Framework for Graph and Hypergraph Neural Networks](https://arxiv.org/pdf/2105.00956) |
 | UniGNN2 | [UniGNN: a Unified Framework for Graph and Hypergraph Neural Networks](https://arxiv.org/pdf/2105.00956) |
 
+### Combinatorial complexes
+| Model | Reference |
+| --- | --- |
+| GCCN | Generalized Combinatorial Complex Neural Networks |
+
+## :bulb: TopoTune
+
+We include TopoTune, a comprehensive framework for easily defining and training new, general TDL models (GCCNs, pictured below) on any domain using any (graph) neural network ω as a backbone, as well as reproducing existing models. To train and test a GCCN, it is sufficient to specify the chocie of domain, neighborhood structure, and backbone model in the configuration. We provide scripts to reproduce a broad class of GCCNs in `scripts/topotune` and reproduce iterations of existing neural networks in `scripts/topotune/existing_models`, as previously reported.
+
+<p align="center">
+  <img src="resources/gccn.jpg" width="700">
+</p>
+
+
 ## :rocket: Liftings
 
 We list the liftings used in `TopoBenchmarkX` to transform datasets. Here, a _lifting_ refers to a function that transforms a dataset defined on a topological domain (_e.g._, on a graph) into the same dataset but supported on a different topological domain (_e.g._, on a simplicial complex).

diff --git a/configs/dataset/graph/MUTAG.yaml b/configs/dataset/graph/MUTAG.yaml
@@ -32,6 +32,6 @@ split_params:
 
 # Dataloader parameters
 dataloader_params:
-  batch_size: 10 # We have an issue with allowing multiple graphs in a batch due to sparse incidences
+  batch_size: 10
   num_workers: 0
   pin_memory: False
diff --git a/configs/logger/wandb.yaml b/configs/logger/wandb.yaml
@@ -7,7 +7,7 @@ wandb:
   offline: False
   id: null # pass correct id to resume experiment!
   anonymous: null # enable anonymous logging
-  project: "None"
+  project: "TopoTune_tests"
   log_model: False # upload lightning ckpts
   prefix: "" # a string to put at the beginning of metric keys
   # entity: "" # set to name of your wandb team

diff --git a/configs/model/cell/topotune.yaml b/configs/model/cell/topotune.yaml
@@ -0,0 +1,59 @@
+_target_: topobenchmarkx.model.TBXModel
+
+model_name: topotune
+model_domain: cell
+tune_gnn: IdentityGIN
+
+feature_encoder:
+  _target_: topobenchmarkx.nn.encoders.${model.feature_encoder.encoder_name}
+  encoder_name: AllCellFeatureEncoder
+  in_channels: ${infer_in_channels:${dataset},${oc.select:transforms,null}} 
+  out_channels: 32
+  proj_dropout: 0.
+  selected_dimensions:
+    - 0
+    - 1
+    - 2
+
+backbone:
+  _target_: topobenchmarkx.nn.backbones.combinatorial.gccn.TopoTune
+  GNN:
+    _target_: torch_geometric.nn.models.${model.tune_gnn}
+    # _target_: topobenchmarkx.nn.backbones.graph.${model.tune_gnn}
+    in_channels: ${model.feature_encoder.out_channels}
+    out_channels: ${model.feature_encoder.out_channels}
+    hidden_channels: ${model.feature_encoder.out_channels}
+    num_layers: 2
+    dropout: 0.0
+    #heads: 4
+    #act: torch.nn.Identity()
+    norm: BatchNorm
+  routes:
+    - - [1, 1]
+      - adjacency
+    - - [0, 1]
+      - cbdry
+    - - [2, 1]
+      - bdry
+  layers: 2
+  use_edge_attr: false
+  activation: relu
+
+backbone_wrapper:
+  _target_: topobenchmarkx.nn.wrappers.combinatorial.TuneWrapper
+  _partial_: true
+  wrapper_name: TuneWrapper
+  out_channels: ${model.feature_encoder.out_channels}
+  num_cell_dimensions: ${infere_num_cell_dimensions:${oc.select:model.feature_encoder.selected_dimensions,null},${model.feature_encoder.in_channels}}
+
+readout:
+  _target_: topobenchmarkx.nn.readouts.${model.readout.readout_name}
+  readout_name: PropagateSignalDown #  Use <NoReadOut> in case readout is not needed Options: PropagateSignalDown
+  num_cell_dimensions: ${infere_num_cell_dimensions:${oc.select:model.feature_encoder.selected_dimensions,null},${model.feature_encoder.in_channels}} # The highest order of cell dimensions to consider
+  hidden_dim: ${model.feature_encoder.out_channels}
+  out_channels: ${dataset.parameters.num_classes}
+  task_level: ${dataset.parameters.task_level}
+  pooling_type: sum
+
+# compile model for faster training with pytorch 2.0
+compile: false
diff --git a/configs/model/cell/topotune_onehasse.yaml b/configs/model/cell/topotune_onehasse.yaml
@@ -0,0 +1,57 @@
+_target_: topobenchmarkx.model.TBXModel
+
+model_name: topotune_onehasse
+model_domain: cell
+tune_gnn: IdentityGCN
+
+feature_encoder:
+  _target_: topobenchmarkx.nn.encoders.${model.feature_encoder.encoder_name}
+  encoder_name: AllCellFeatureEncoder
+  in_channels: ${infer_in_channels:${dataset},${oc.select:transforms,null}} 
+  out_channels: 32
+  proj_dropout: 0.
+  selected_dimensions:
+    - 0
+    - 1
+    - 2
+
+backbone:
+  _target_: topobenchmarkx.nn.backbones.combinatorial.gccn_onehasse.TopoTune_OneHasse
+  GNN:
+    _target_: torch_geometric.nn.models.${model.tune_gnn}
+    # _target_: topobenchmarkx.nn.backbones.graph.${model.tune_gnn}
+    in_channels: ${model.feature_encoder.out_channels}
+    out_channels: ${model.feature_encoder.out_channels}
+    hidden_channels: ${model.feature_encoder.out_channels}
+    num_layers: 2
+    dropout: 0.0
+    norm: BatchNorm
+  routes:
+    - - [1, 1]
+      - adjacency
+    - - [0, 1]
+      - cbdry
+    - - [2, 1]
+      - bdry
+  layers: 2
+  use_edge_attr: false
+  activation: relu
+
+backbone_wrapper:
+  _target_: topobenchmarkx.nn.wrappers.combinatorial.TuneWrapper
+  _partial_: true
+  wrapper_name: TuneWrapper
+  out_channels: ${model.feature_encoder.out_channels}
+  num_cell_dimensions: ${infere_num_cell_dimensions:${oc.select:model.feature_encoder.selected_dimensions,null},${model.feature_encoder.in_channels}}
+
+readout:
+  _target_: topobenchmarkx.nn.readouts.${model.readout.readout_name}
+  readout_name: PropagateSignalDown #  Use <NoReadOut> in case readout is not needed Options: PropagateSignalDown
+  num_cell_dimensions: ${infere_num_cell_dimensions:${oc.select:model.feature_encoder.selected_dimensions,null},${model.feature_encoder.in_channels}} # The highest order of cell dimensions to consider
+  hidden_dim: ${model.feature_encoder.out_channels}
+  out_channels: ${dataset.parameters.num_classes}
+  task_level: ${dataset.parameters.task_level}
+  pooling_type: sum
+
+# compile model for faster training with pytorch 2.0
+compile: false
diff --git a/configs/model/simplicial/topotune.yaml b/configs/model/simplicial/topotune.yaml
@@ -0,0 +1,59 @@
+_target_: topobenchmarkx.model.TBXModel
+
+model_name: topotune
+model_domain: simplicial
+tune_gnn: IdentityGIN
+
+feature_encoder:
+  _target_: topobenchmarkx.nn.encoders.${model.feature_encoder.encoder_name}
+  encoder_name: AllCellFeatureEncoder
+  in_channels: ${infer_in_channels:${dataset},${oc.select:transforms,null}} 
+  out_channels: 32
+  proj_dropout: 0.
+  selected_dimensions:
+    - 0
+    - 1
+    - 2
+
+backbone:
+  _target_: topobenchmarkx.nn.backbones.combinatorial.gccn.TopoTune
+  GNN:
+    _target_: torch_geometric.nn.models.${model.tune_gnn}
+    # _target_: topobenchmarkx.nn.backbones.graph.${model.tune_gnn}
+    in_channels: ${model.feature_encoder.out_channels}
+    out_channels: ${model.feature_encoder.out_channels}
+    hidden_channels: ${model.feature_encoder.out_channels}
+    num_layers: 2
+    dropout: 0.0
+    #heads: 4
+    #act: torch.nn.Identity()
+    norm: BatchNorm
+  routes:
+    - - [1, 1]
+      - adjacency
+    - - [0, 1]
+      - cbdry
+    - - [2, 1]
+      - bdry
+  layers: 2
+  use_edge_attr: false
+  activation: relu
+
+backbone_wrapper:
+  _target_: topobenchmarkx.nn.wrappers.combinatorial.TuneWrapper
+  _partial_: true
+  wrapper_name: TuneWrapper
+  out_channels: ${model.feature_encoder.out_channels}
+  num_cell_dimensions: ${infere_num_cell_dimensions:${oc.select:model.feature_encoder.selected_dimensions,null},${model.feature_encoder.in_channels}}
+
+readout:
+  _target_: topobenchmarkx.nn.readouts.${model.readout.readout_name}
+  readout_name: PropagateSignalDown #  Use <NoReadOut> in case readout is not needed Options: PropagateSignalDown
+  num_cell_dimensions: ${infere_num_cell_dimensions:${oc.select:model.feature_encoder.selected_dimensions,null},${model.feature_encoder.in_channels}} # The highest order of cell dimensions to consider
+  hidden_dim: ${model.feature_encoder.out_channels}
+  out_channels: ${dataset.parameters.num_classes}
+  task_level: ${dataset.parameters.task_level}
+  pooling_type: sum
+
+# compile model for faster training with pytorch 2.0
+compile: false
diff --git a/configs/model/simplicial/topotune_onehasse.yaml b/configs/model/simplicial/topotune_onehasse.yaml
@@ -0,0 +1,57 @@
+_target_: topobenchmarkx.model.TBXModel
+
+model_name: topotune_onehasse
+model_domain: simplicial
+tune_gnn: GCN
+
+feature_encoder:
+  _target_: topobenchmarkx.nn.encoders.${model.feature_encoder.encoder_name}
+  encoder_name: AllCellFeatureEncoder
+  in_channels: ${infer_in_channels:${dataset},${oc.select:transforms,null}} 
+  out_channels: 32
+  proj_dropout: 0.
+  selected_dimensions:
+    - 0
+    - 1
+    - 2
+
+backbone:
+  _target_: topobenchmarkx.nn.backbones.combinatorial.gccn_onehasse.TopoTune_OneHasse
+  GNN:
+    _target_: torch_geometric.nn.models.${model.tune_gnn}
+    # _target_: topobenchmarkx.nn.backbones.graph.${model.tune_gnn}
+    in_channels: ${model.feature_encoder.out_channels}
+    out_channels: ${model.feature_encoder.out_channels}
+    hidden_channels: ${model.feature_encoder.out_channels}
+    num_layers: 2
+    dropout: 0.0
+    norm: BatchNorm
+  routes:
+    - - [1, 1]
+      - adjacency
+    - - [0, 1]
+      - cbdry
+    - - [2, 1]
+      - bdry
+  layers: 2
+  use_edge_attr: false
+  activation: relu
+
+backbone_wrapper:
+  _target_: topobenchmarkx.nn.wrappers.combinatorial.TuneWrapper
+  _partial_: true
+  wrapper_name: TuneWrapper
+  out_channels: ${model.feature_encoder.out_channels}
+  num_cell_dimensions: ${infere_num_cell_dimensions:${oc.select:model.feature_encoder.selected_dimensions,null},${model.feature_encoder.in_channels}}
+
+readout:
+  _target_: topobenchmarkx.nn.readouts.${model.readout.readout_name}
+  readout_name: PropagateSignalDown #  Use <NoReadOut> in case readout is not needed Options: PropagateSignalDown
+  num_cell_dimensions: ${infere_num_cell_dimensions:${oc.select:model.feature_encoder.selected_dimensions,null},${model.feature_encoder.in_channels}} # The highest order of cell dimensions to consider
+  hidden_dim: ${model.feature_encoder.out_channels}
+  out_channels: ${dataset.parameters.num_classes}
+  task_level: ${dataset.parameters.task_level}
+  pooling_type: sum
+
+# compile model for faster training with pytorch 2.0
+compile: false
diff --git a/configs/run.yaml b/configs/run.yaml
@@ -4,8 +4,8 @@
 # order of defaults determines the order in which configs override each other
 defaults:
   - _self_
-  - dataset: graph/NCI1
-  - model: graph/gat
+  - dataset: graph/PROTEINS
+  - model: simplicial/topotune_onehasse
   - transforms: ${get_default_transform:${dataset},${model}} #no_transform
   - optimizer: default
   - loss: default

diff --git a/configs/transforms/liftings/graph2cell/cycle.yaml b/configs/transforms/liftings/graph2cell/cycle.yaml
@@ -2,5 +2,5 @@ _target_: topobenchmarkx.transforms.data_transform.DataTransform
 transform_type: 'lifting'
 transform_name: "CellCycleLifting"
 complex_dim: ${oc.select:dataset.parameters.max_dim_if_lifted,3}
-max_cell_length: 10
+max_cell_length: 18
 preserve_edge_attr: ${oc.select:dataset.parameters.preserve_edge_attr_if_lifted,False}
diff --git a/env_setup.sh b/env_setup.sh
@@ -5,11 +5,11 @@ pip install -e '.[all]'
 # Note that not all combinations of torch and CUDA are available
 # See https://github.com/pyg-team/pyg-lib to check the configuration that works for you
 TORCH="2.3.0"   # available options: 1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.2.0, or 2.3.0
-CUDA="cu121"    # if available, select the CUDA version suitable for your system
+CUDA="cu118"    # if available, select the CUDA version suitable for your system
                 # available options: cpu, cu102, cu113, cu116, cu117, cu118, or cu121
 pip install torch==${TORCH} --extra-index-url https://download.pytorch.org/whl/${CUDA}
 pip install pyg-lib torch-scatter torch-sparse torch-cluster -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
 
-pytest
+#pytest
 
 pre-commit install
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "TopoBenchmarkX"
+name = "TopoTuneBenchmarkX"
 dynamic = ["version"]
 authors = [
     {name = "PyT-Team Authors", email = "tlscabinet@gmail.com"}
@@ -51,7 +51,7 @@ dependencies=[
     "toponetx @ git+https://github.com/pyt-team/TopoNetX.git",
     "topomodelx @ git+https://github.com/pyt-team/TopoModelX.git",
     "topoembedx @ git+https://github.com/pyt-team/TopoEmbedX.git",
-    "lightning",
+    "lightning==2.2.5",
 ]
 
 [project.optional-dependencies]
@@ -77,8 +77,8 @@ test = [
     "pytest-mock"
 ]
 
-dev = ["TopoBenchmarkX[test, lint]"]
-all = ["TopoBenchmarkX[dev, doc]"]
+dev = ["TopoTuneBenchmarkX[test, lint]"]
+all = ["TopoTuneBenchmarkX[dev, doc]"]
 
 [project.urls]
 homepage="https://github.com/pyt-team/TopoBenchmarkX"

diff --git a/resources/gccn.jpg b/resources/gccn.jpg