ApolloResearch · danbraunai-apollo · Dec 12, 2023 · Nov 28, 2023 · Nov 28, 2023 · Nov 28, 2023
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -94,6 +94,15 @@
             "console": "integratedTerminal",
             "justMyCode": true
         },
+        {
+            "name": "modular mlp rib build",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/experiments/mlp_rib_build/run_mlp_rib_build.py",
+            "args": "${workspaceFolder}/experiments/mlp_rib_build/block_diagonal.yaml",
+            "console": "integratedTerminal",
+            "justMyCode": true
+        },
         {
             "name": "train modadd",
             "type": "python",

diff --git a/experiments/lm_rib_build/run_lm_rib_build.py b/experiments/lm_rib_build/run_lm_rib_build.py
@@ -133,10 +133,11 @@ class Config(BaseModel):
         description="The type of evaluation to perform on the model before building the graph."
         "If None, skip evaluation.",
     )
-    basis_formula: Literal["(1-alpha)^2", "(1-0)*alpha", "svd"] = Field(
+    basis_formula: Literal["(1-alpha)^2", "(1-0)*alpha", "svd", "neuron"] = Field(
         "(1-0)*alpha",
         description="The integrated gradient formula to use to calculate the basis. If 'svd', will"
-        "use Us as Cs, giving the eigendecomposition of the gram matrix.",
+        "use Us as Cs, giving the eigendecomposition of the gram matrix. If 'neuron', will use "
+        "the neuron-basis.",
     )
     edge_formula: Literal["functional", "squared"] = Field(
         "functional",

diff --git a/experiments/mlp_rib_build/block_diagonal.yaml b/experiments/mlp_rib_build/block_diagonal.yaml
@@ -0,0 +1,28 @@
+exp_name: block_diagonal
+node_layers:
+  - layers.0
+  - layers.1
+  - layers.2
+  - layers.3
+  - layers.4
+  - output
+dataset:
+  name: block_vector
+  size: 10000
+  length: 4
+  data_variances: [1, 1]
+  data_perfect_correlation: false
+  seed: 0
+modular_mlp_config:
+  n_hidden_layers: 4
+  width: 4
+  weight_variances: [2, 2]
+  weight_equal_columns: false
+  bias: 0
+seed: 0
+dtype: float64
+batch_size: 256
+n_intervals: 0
+truncation_threshold: 1e-15
+rotate_final_node_layer: false
+basis_formula: (1-0)*alpha
diff --git a/experiments/mlp_rib_build/plot_mlp_graph.py b/experiments/mlp_rib_build/plot_mlp_graph.py
@@ -1,41 +1,66 @@
 """Plot an interaction graph given a results file contain the graph edges.
 
+# TODO: Merge with experiments/lm_rib_build/plot_lm_graph.py
 Usage:
     python plot_mlp_graph.py <path/to/results_pt_file>
 
     The results_pt_file should be the output of the run_mlp_rib_build.py script.
 """
+import csv
 from pathlib import Path
+from typing import Optional, Union
 
 import fire
 import torch
 
+from rib.log import logger
 from rib.plotting import plot_interaction_graph
 from rib.utils import check_outfile_overwrite
 
 
-def main(results_file: str, force: bool = True) -> None:
+def main(
+    results_file: str,
+    nodes_per_layer: Optional[Union[int, list[int]]] = None,
+    labels_file: Optional[str] = None,
+    out_file: Optional[Union[str, Path]] = None,
+    force: bool = False,
+) -> None:
     """Plot an interaction graph given a results file contain the graph edges."""
     results = torch.load(results_file)
     out_dir = Path(__file__).parent / "out"
-    out_file = out_dir / f"{results['exp_name']}_rib_graph.png"
+    if out_file is None:
+        out_file = out_dir / f"{results['exp_name']}_rib_graph.png"
+    else:
+        out_file = Path(out_file)
+
+    # Input layer is much larger for mnist so include more nodes in it
+    nodes_per_layer = [40, 10, 10, 10] if nodes_per_layer is None else nodes_per_layer
 
     if not check_outfile_overwrite(out_file, force):
         return
 
-    # Input layer is much larger so include more nodes in it
-    nodes_per_layer = [40, 10, 10, 10]
+    # Ensure that we have edges
+    assert results["edges"], "The results file does not contain any edges."
 
-    layer_names = results["config"]["node_layers"] + ["output"]
+    # Add labels if provided
+    if labels_file is not None:
+        with open(labels_file, "r", newline="") as file:
+            reader = csv.reader(file)
+            node_labels = list(reader)
+    else:
+        node_labels = None
 
     plot_interaction_graph(
         raw_edges=results["edges"],
-        layer_names=layer_names,
+        layer_names=results["config"]["node_layers"],
         exp_name=results["exp_name"],
         nodes_per_layer=nodes_per_layer,
         out_file=out_file,
+        node_labels=node_labels,
     )
 
+    logger.info("Saved plot to %s", out_file)
+
 
 if __name__ == "__main__":
     fire.Fire(main)
diff --git a/experiments/mlp_rib_build/run_mlp_rib_build.py b/experiments/mlp_rib_build/run_mlp_rib_build.py
@@ -14,42 +14,48 @@
 
 """
 
-import json
 from dataclasses import asdict
 from pathlib import Path
 from typing import Literal, Optional, Union
 
 import fire
 import torch
 import yaml
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, model_validator
 from torch.utils.data import DataLoader
 
-from rib.data import VisionDatasetConfig
+from rib.data import BlockVectorDatasetConfig, VisionDatasetConfig
 from rib.data_accumulator import collect_gram_matrices, collect_interaction_edges
 from rib.hook_manager import HookedModel
 from rib.interaction_algos import calculate_interaction_rotations
 from rib.loader import load_dataset, load_mlp
 from rib.log import logger
 from rib.models.mlp import MLPConfig
+from rib.models.modular_mlp import ModularMLPConfig
 from rib.types import TORCH_DTYPES, RibBuildResults, RootPath, StrDtype
 from rib.utils import check_outfile_overwrite, load_config, set_seed
 
 
 class Config(BaseModel):
     model_config = ConfigDict(extra="forbid", frozen=True)
     exp_name: str
-    mlp_path: RootPath
+    mlp_path: Optional[RootPath] = Field(
+        None,
+        description="Path to the saved MLP model. If None, we expect the MLP class to not be "
+        "randomly initialized (e.g. like in the ModularMLP class).",
+    )
     batch_size: int
     seed: Optional[int] = 0
     truncation_threshold: float  # Remove eigenvectors with eigenvalues below this threshold.
     rotate_final_node_layer: bool  # Whether to rotate the output layer to its eigenbasis.
     n_intervals: int  # The number of intervals to use for integrated gradients.
     dtype: StrDtype  # Data type of all tensors (except those overriden in certain functions).
     node_layers: list[str]
-    basis_formula: Literal["(1-alpha)^2", "(1-0)*alpha"] = Field(
+    basis_formula: Literal["(1-alpha)^2", "(1-0)*alpha", "svd", "neuron"] = Field(
         "(1-0)*alpha",
-        description="The integrated gradient formula to use to calculate the basis.",
+        description="The integrated gradient formula to use to calculate the basis. If 'svd', will"
+        "use Us as Cs, giving the eigendecomposition of the gram matrix. If 'neuron', will use "
+        "the neuron-basis.",
     )
     edge_formula: Literal["functional", "squared"] = Field(
         "functional",
@@ -60,27 +66,48 @@ class Config(BaseModel):
         description="Directory for the output files. Defaults to `./out/`. If None, no output "
         "is written. If a relative path, it is relative to the root of the rib repo.",
     )
-    dataset: VisionDatasetConfig = VisionDatasetConfig()
+    dataset: Union[VisionDatasetConfig, BlockVectorDatasetConfig] = Field(
+        VisionDatasetConfig(),
+        description="The dataset to use to build the graph.",
+    )
+    modular_mlp_config: Optional[ModularMLPConfig] = Field(
+        None,
+        description="The model to use. If None, we expect mlp_path to be set.",
+    )
+
+    @model_validator(mode="after")
+    def verify_model_config(self) -> "Config":
+        """Verify that model_config is set if modular_mlp_config is not."""
+        if self.mlp_path is None and self.modular_mlp_config is None:
+            raise ValueError("model must be set if modular_mlp_config is not.")
+        return self
 
 
 def main(config_path_or_obj: Union[str, Config], force: bool = False) -> RibBuildResults:
     """Implement the main algorithm and store the graph to disk."""
     config = load_config(config_path_or_obj, config_model=Config)
     set_seed(config.seed)
 
-    with open(config.mlp_path.parent / "config.yaml", "r") as f:
-        model_config_dict = yaml.safe_load(f)
-
     if config.out_dir is not None:
         config.out_dir.mkdir(parents=True, exist_ok=True)
         out_file = config.out_dir / f"{config.exp_name}_rib_graph.pt"
         if not check_outfile_overwrite(out_file, force):
             raise FileExistsError("Not overwriting output file")
 
+    mlp_config: Union[MLPConfig, ModularMLPConfig]
+    if config.mlp_path is not None:
+        with open(config.mlp_path.parent / "config.yaml", "r") as f:
+            model_config_dict = yaml.safe_load(f)
+        mlp_config = MLPConfig(**model_config_dict["model"])
+    else:
+        assert config.modular_mlp_config is not None
+        mlp_config = config.modular_mlp_config
+
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = TORCH_DTYPES[config.dtype]
-    mlp_config = MLPConfig(**model_config_dict["model"])
-    mlp = load_mlp(mlp_config, config.mlp_path, fold_bias=True, device=device)
+    mlp = load_mlp(
+        mlp_config, mlp_path=config.mlp_path, fold_bias=True, device=device, seed=config.seed
+    )
     assert mlp.has_folded_bias, "MLP must have folded bias to run RIB"
 
     all_possible_node_layers = [f"layers.{i}" for i in range(len(mlp.layers))] + ["output"]
@@ -151,8 +178,8 @@ def main(config_path_or_obj: Union[str, Config], force: bool = False) -> RibBuil
         "interaction_rotations": interaction_rotations,
         "eigenvectors": eigenvectors,
         "edges": [(module, E_hats[module].cpu()) for module in E_hats],
-        "config": json.loads(config.model_dump_json()),
-        "model_config_dict": model_config_dict,
+        "config": config.model_dump(),
+        "model_config_dict": mlp_config.model_dump(),
     }
 
     # Save the results (which include torch tensors) to file

diff --git a/rib/data.py b/rib/data.py
@@ -2,10 +2,20 @@
 from typing import Literal, Optional
 
 import torch
-from jaxtyping import Int
-from pydantic import BaseModel, ConfigDict, Field, model_validator
+from jaxtyping import Float, Int
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    ValidationInfo,
+    field_validator,
+    model_validator,
+)
 from torch import Tensor
 from torch.utils.data import Dataset
+from typing_extensions import Annotated
+
+from rib.types import TORCH_DTYPES, StrDtype
 
 
 class DatasetConfig(BaseModel):
@@ -121,8 +131,104 @@ def __len__(self) -> int:
 
 
 class VisionDatasetConfig(DatasetConfig):
-    source: Literal["custom"] = "custom"
     name: Literal["CIFAR10", "MNIST"] = "MNIST"
     seed: Optional[int] = 0
     return_set_frac: Optional[float] = None  # Needed for some reason to avoid mypy errors
     return_set_n_samples: Optional[int] = None  # Needed for some reason to avoid mypy errors
+
+
+class BlockVectorDatasetConfig(DatasetConfig):
+    name: Literal["block_vector"] = "block_vector"
+    size: int = Field(
+        1000,
+        description="Number of samples in the dataset.",
+    )
+    length: int = Field(
+        4,
+        description="Length of each vector.",
+    )
+    first_block_length: Optional[int] = Field(
+        None,
+        description="Length of the first block. If None, defaults to length // 2.",
+        validate_default=True,
+    )
+    data_variances: list[float] = Field(
+        [1.0, 1.0],
+        description="Variance of the two blocks of the vectors.",
+    )
+    data_perfect_correlation: bool = Field(
+        False,
+        description="Whether to make the data within each block perfectly correlated.",
+    )
+    dtype: StrDtype = "float64"
+    seed: Optional[int] = 0
+
+    @field_validator("first_block_length", mode="after")
+    @classmethod
+    def set_first_block_length(cls, v: Optional[int], info: ValidationInfo) -> int:
+        if v is None:
+            return info.data["length"] // 2
+        return v
+
+
+class BlockVectorDataset(Dataset):
+    def __init__(
+        self,
+        dataset_config: BlockVectorDatasetConfig,
+    ):
+        """Generate a dataset of random normal vectors.
+
+        The components in `[:first_block_length]` have variance `data_variances[0]`, while the
+        components in `[first_block_length:length]` have variance `data_variances[1]`.
+        If `data_perfect_correlation` is true, the entries in each block are identical. Otherwise
+        they have no correlation.
+        """
+        self.cfg = dataset_config
+        self.data = self.generate_data()
+        # Not needed, just here for Dataset class
+        self.labels = torch.nan * torch.ones(self.cfg.size)
+
+    def __len__(self):
+        return self.cfg.size
+
+    def __getitem__(self, idx):
+        return self.data[idx], self.labels[idx]
+
+    def generate_data(self) -> Float[Tensor, "size length"]:
+        """Generate a dataset of vectors with two blocks of variance.
+
+        Warning, changing the structure of this function may break reproducibility.
+
+        Returns:
+            A dataset of vectors with two blocks of variance.
+        """
+        dtype = TORCH_DTYPES[self.cfg.dtype]
+        size = self.cfg.size
+        length = self.cfg.length
+        first_block_length = self.cfg.first_block_length
+        data_variances = self.cfg.data_variances
+        data_perfect_correlation = self.cfg.data_perfect_correlation
+
+        first_block_length = first_block_length or length // 2
+        second_block_length = length - first_block_length
+        data = torch.empty((size, length), dtype=dtype)
+
+        if self.cfg.seed is not None:
+            torch.manual_seed(self.cfg.seed)
+
+        if not data_perfect_correlation:
+            data[:, 0:first_block_length] = data_variances[0] * torch.randn(
+                size, first_block_length, dtype=dtype
+            )
+            data[:, first_block_length:] = data_variances[1] * torch.randn(
+                size, second_block_length, dtype=dtype
+            )
+        else:
+            data[:, 0:first_block_length] = data_variances[0] * torch.randn(
+                size, 1, dtype=dtype
+            ).repeat(1, first_block_length)
+            data[:, first_block_length:] = data_variances[1] * torch.randn(
+                size, 1, dtype=dtype
+            ).repeat(1, second_block_length)
+
+        return data