pytorch · vmoens · Nov 8, 2024 · Jul 22, 2024 · Jul 23, 2024 · Aug 3, 2024
diff --git a/docs/source/_static/img/mcts_forest.png b/docs/source/_static/img/mcts_forest.png
diff --git a/docs/source/reference/data.rst b/docs/source/reference/data.rst
@@ -975,7 +975,72 @@ The following classes are deprecated and just point to the classes above:
 Trees and Forests
 -----------------
 
-TorchRL offers a set of classes and functions that can be used to represent trees and forests efficiently.
+TorchRL offers a set of classes and functions that can be used to represent trees and forests efficiently,
+which is particularly useful for Monte Carlo Tree Search (MCTS) algorithms.
+
+TensorDictMap
+~~~~~~~~~~~~~
+
+At its core, the MCTS API relies on the :class:`~torchrl.data.TensorDictMap` which acts like a storage where indices can
+be any numerical object. In traditional storages (e.g., :class:`~torchrl.data.TensorStorage`), only integer indices
+are allowed:
+
+    >>> storage = TensorStorage(...)
+    >>> data = storage[3]
+
+:class:`~torchrl.data.TensorDictMap` allows us to make more advanced queries in the storage. The typical example is
+when we have a storage containing a set of MDPs and we want to rebuild a trajectory given its initial observation, action
+pair. In tensor terms, this could be written with the following pseudocode:
+
+    >>> next_state = storage[observation, action]
+
+(if there is more than one next state associated with this pair one could return a stack of ``next_states`` instead).
+This API would make sense but it would be restrictive: allowing observations or actions that are composed of
+multiple tensors may be hard to implement. Instead, we provide a tensordict containing these values and let the storage
+know what ``in_keys`` to look at to query the next state:
+
+    >>> td = TensorDict(observation=observation, action=action)
+    >>> next_td = storage[td]
+
+Of course, this class also allows us to extend the storage with new data:
+
+    >>> storage[td] = next_state
+
+This comes in handy because it allows us to represent complex rollout structures where different actions are undertaken
+at a given node (ie, for a given observation). All `(observation, action)` pairs that have been observed may lead us to
+a (set of) rollout that we can use further.
+
+MCTSForest
+~~~~~~~~~~
+
+Building a tree from an initial observation then becomes just a matter of organizing data efficiently.
+The :class:`~torchrl.data.MCTSForest` has at its core two storages: a first storage links observations to hashes and
+indices of actions encountered in the past in the dataset:
+
+    >>> data = TensorDict(observation=observation)
+    >>> metadata = forest.node_map[data]
+    >>> index = metadata["_index"]
+
+where ``forest`` is a :class:`~torchrl.data.MCTSForest` instance.
+Then, a second storage keeps track of the actions and results associated with the observation:
+
+    >>> next_data = forest.data_map[index]
+
+The ``next_data`` entry can have any shape, but it will usually match the shape of ``index`` (since at each index
+corresponds one action). Once ``next_data`` is obtrained, it can be put together with ``data`` to form a set of nodes,
+and the tree can be expanded for each of these. The following figure shows how this is done.
+
+.. figure:: /_static/img/collector-copy.png
+
+    Building a :class:`~torchrl.data.Tree` from a :class:`~torchrl.data.MCTSForest` object.
+    The flowchart represents a tree being built from an initial observation `o`. The :class:`~torchrl.data.MCTSForest.get_tree`
+    method passed the input data structure (the root node) to the ``node_map`` :class:`~torchrl.data.TensorDictMap` instance
+    that returns a set of hashes and indices. These indices are then used to query the corresponding tuples of
+    actions, next observations, rewards etc. that are associated with the root node.
+    A vertex is created from each of them (possibly with a longer rollout when a compact representation is asked).
+    The stack of vertices is then used to build up the tree further, and these vertices are stacked together and make
+    up the branches of the tree at the root. This process is repeated for a given depth or until the tree cannot be
+    expanded anymore.
 
 .. currentmodule:: torchrl.data
 
@@ -985,11 +1050,13 @@ TorchRL offers a set of classes and functions that can be used to represent tree
 
     BinaryToDecimal
     HashToInt
+    MCTSForeset
     QueryModule
     RandomProjectionHash
     SipHash
     TensorDictMap
     TensorMap
+    Tree
 
 
 Reinforcement Learning From Human Feedback (RLHF)

diff --git a/test/test_storage_map.py b/test/test_storage_map.py
@@ -5,13 +5,14 @@
 import argparse
 import functools
 import importlib.util
+from typing import Tuple
 
 import pytest
 
 import torch
 
-from tensordict import TensorDict
-from torchrl.data import LazyTensorStorage, ListStorage
+from tensordict import assert_close, TensorDict
+from torchrl.data import LazyTensorStorage, ListStorage, MCTSForest
 from torchrl.data.map import (
     BinaryToDecimal,
     QueryModule,
@@ -238,6 +239,240 @@ def test_map_rollout(self):
         assert not contains[rollout.shape[-1] :].any()
 
 
+class TestMCTSForest:
+    def dummy_rollouts(self) -> Tuple[TensorDict, ...]:
+        """
+        ├── 0
+        │   ├── 16
+        │   ├── 17
+        │   ├── 18
+        │   ├── 19
+        │   └── 20
+        ├── 1
+        ├── 2
+        ├── 3
+        │   ├── 6
+        │   ├── 7
+        │   ├── 8
+        │   ├── 9
+        │   └── 10
+        ├── 4
+        │   ├── 11
+        │   ├── 12
+        │   ├── 13
+        │   │   ├── 21
+        │   │   ├── 22
+        │   │   ├── 23
+        │   │   ├── 24
+        │   │   └── 25
+        │   ├── 14
+        │   └── 15
+        └── 5
+
+        """
+
+        states0 = torch.arange(6)
+        actions0 = torch.full((5,), 0)
+
+        states1 = torch.cat([torch.tensor([3]), torch.arange(6, 11)])
+        actions1 = torch.full((5,), 1)
+
+        states2 = torch.cat([torch.tensor([4]), torch.arange(11, 16)])
+        actions2 = torch.full((5,), 2)
+
+        states3 = torch.cat([torch.tensor([0]), torch.arange(16, 21)])
+        actions3 = torch.full((5,), 3)
+
+        states4 = torch.cat([torch.tensor([13]), torch.arange(21, 26)])
+        actions4 = torch.full((5,), 4)
+
+        return (
+            self._make_td(states0, actions0),
+            self._make_td(states1, actions1),
+            self._make_td(states2, actions2),
+            self._make_td(states3, actions3),
+            self._make_td(states4, actions4),
+        )
+
+    def _state0(self) -> TensorDict:
+        return self.dummy_rollouts()[0][0]
+
+    @staticmethod
+    def _make_td(state: torch.Tensor, action: torch.Tensor) -> TensorDict:
+        done = torch.zeros_like(action, dtype=torch.bool).unsqueeze(-1)
+        reward = action.clone()
+
+        return TensorDict(
+            {
+                "observation": state[:-1],
+                "action": action,
+                "done": torch.zeros_like(done),
+                "next": {
+                    "observation": state[1:],
+                    "done": done,
+                    "reward": reward,
+                },
+            }
+        ).auto_batch_size_()
+
+    def _make_forest(self) -> MCTSForest:
+        r0, r1, r2, r3, r4 = self.dummy_rollouts()
+        assert r0.shape
+        forest = MCTSForest()
+        forest.extend(r0)
+        forest.extend(r1)
+        forest.extend(r2)
+        forest.extend(r3)
+        forest.extend(r4)
+        return forest
+
+    def _make_forest_intersect(self) -> MCTSForest:
+        """
+        ├── 0
+        │   ├── 16
+        │   ├── 17
+        │   ├── 18
+        │   ├── 19───────│
+        │   │    └── 26  │
+        │   └── 20       │
+        ├── 1            │
+        ├── 2            │
+        ├── 3            │
+        │   ├── 6        │
+        │   ├── 7        │
+        │   ├── 8        │
+        │   ├── 9        │
+        │   └── 10       │
+        ├── 4            │
+        │   ├── 11       │
+        │   ├── 12       │
+        │   ├── 13       │
+        │   │   ├── 21   │
+        │   │   ├── 22   │
+        │   │   ├── 23   │
+        │   │   ├── 24 ──│
+        │   │   └── 25
+        │   ├── 14
+        │   └── 15
+        └── 5
+        """
+        forest = self._make_forest()
+        states5 = torch.cat([torch.tensor([24]), torch.tensor([19, 26])])
+        actions5 = torch.full((2,), 5)
+        rollout5 = self._make_td(states5, actions5)
+        forest.extend(rollout5)
+        return forest
+
+    @staticmethod
+    def make_labels(tree):
+        if tree.rollout is not None:
+            s = torch.cat(
+                [
+                    tree.rollout["observation"][:1],
+                    tree.rollout["next", "observation"],
+                ]
+            )
+            a = tree.rollout["action"].tolist()
+            s = s.tolist()
+            return f"node {tree.node_id}: states {s}, actions {a}"
+        return f"node {tree.node_id}"
+
+    def test_forest_build(self):
+        r0, *_ = self.dummy_rollouts()
+        forest = self._make_forest()
+        tree = forest.get_tree(r0[0])
+        tree.plot(make_labels=self.make_labels)
+
+    def test_forest_vertices(self):
+        r0, *_ = self.dummy_rollouts()
+        forest = self._make_forest()
+
+        tree = forest.get_tree(r0[0])
+        assert tree.num_vertices() == 9  # (0, 20, 3, 10, 4, 13, 25, 15, 5)
+
+        tree = forest.get_tree(r0[0], compact=False)
+        assert tree.num_vertices() == 26
+
+    def test_forest_rebuild_rollout(self):
+        r0, r1, r2, r3, r4 = self.dummy_rollouts()
+        forest = self._make_forest()
+
+        tree = forest.get_tree(r0[0])
+        assert_close(tree.rollout_from_path((0, 0, 0)), r0, intersection=True)
+        assert_close(tree.rollout_from_path((0, 1))[-5:], r1, intersection=True)
+        assert_close(tree.rollout_from_path((0, 0, 1, 0))[-5:], r2, intersection=True)
+        assert_close(tree.rollout_from_path((1,))[-5:], r3, intersection=True)
+        assert_close(tree.rollout_from_path((0, 0, 1, 1))[-5:], r4, intersection=True)
+
+    def test_forest_check_hashes(self):
+        r0, *_ = self.dummy_rollouts()
+        forest = self._make_forest()
+        tree = forest.get_tree(r0[0])
+        nodes = range(tree.num_vertices())
+        hashes = set()
+        for n in nodes:
+            vertex = tree.get_vertex_by_id(n)
+            node_hash = vertex.hash
+            if node_hash is not None:
+                assert isinstance(node_hash, int)
+                hashes.add(node_hash)
+            else:
+                assert vertex is tree
+        assert len(hashes) == tree.num_vertices() - 1
+
+    def test_forest_check_ids(self):
+        r0, *_ = self.dummy_rollouts()
+        forest = self._make_forest()
+        tree = forest.get_tree(r0[0])
+        nodes = range(tree.num_vertices())
+        for n in nodes:
+            vertex = tree.get_vertex_by_id(n)
+            node_id = vertex.node_id
+            assert isinstance(node_id, int)
+            assert node_id == n
+
+    # Ideally, we'd like to have only views but because we index the storage with a tensor
+    #  we actually get regular, single-storage tensors
+    # def test_forest_view(self):
+    #     import tensordict.base
+    #     r0, *_ = self.dummy_rollouts()
+    #     forest = self._make_forest()
+    #     tree = forest.get_tree(r0[0])
+    #     dataptr = set()
+    #     # Check that all tensors point to the same storage (ie, that we only have views)
+    #     for k, v in tree.items(True, True, is_leaf=tensordict.base._NESTED_TENSORS_AS_LISTS):
+    #         if isinstance(k, tuple) and "rollout" in k:
+    #             dataptr.add(v.storage().data_ptr())
+    #             assert len(dataptr) == 1, k
+
+    def test_forest_intersect(self):
+        state0 = self._state0()
+        forest = self._make_forest_intersect()
+        tree = forest.get_tree(state0)
+        subtree = forest.get_tree(TensorDict(observation=19))
+
+        # subtree.plot(make_labels=make_labels)
+        # tree.plot(make_labels=make_labels)
+        assert tree.get_vertex_by_id(2).num_children == 2
+        assert tree.get_vertex_by_id(1).num_children == 2
+        assert tree.get_vertex_by_id(3).num_children == 2
+        assert tree.get_vertex_by_id(8).num_children == 2
+        assert tree.get_vertex_by_id(10).num_children == 2
+        assert tree.get_vertex_by_id(12).num_children == 2
+
+        # Test contains
+        assert subtree in tree
+
+    def test_forest_intersect_vertices(self):
+        state0 = self._state0()
+        forest = self._make_forest_intersect()
+        tree = forest.get_tree(state0)
+        assert len(tree.vertices(key_type="path")) > len(tree.vertices(key_type="hash"))
+        assert len(tree.vertices(key_type="id")) == len(tree.vertices(key_type="hash"))
+        with pytest.raises(ValueError, match="key_type must be"):
+            tree.vertices(key_type="another key type")
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -7098,7 +7098,7 @@ def test_tensordictprimer_batching(self, batched_class, break_when_any_done):
         torch.manual_seed(0)
         env.set_seed(0)
         r1 = env.rollout(100, break_when_any_done=break_when_any_done)
-        tensordict.tensordict.assert_allclose_td(r0, r1)
+        tensordict.assert_close(r0, r1)
 
     def test_callable_default_value(self):
         def create_tensor():

diff --git a/torchrl/data/__init__.py b/torchrl/data/__init__.py
@@ -6,11 +6,13 @@
 from .map import (
     BinaryToDecimal,
     HashToInt,
+    MCTSForest,
     QueryModule,
     RandomProjectionHash,
     SipHash,
     TensorDictMap,
     TensorMap,
+    Tree,
 )
 from .postprocs import MultiStep
 from .replay_buffers import (

diff --git a/torchrl/data/map/__init__.py b/torchrl/data/map/__init__.py
@@ -6,3 +6,4 @@
 from .hash import BinaryToDecimal, RandomProjectionHash, SipHash
 from .query import HashToInt, QueryModule
 from .tdstorage import TensorDictMap, TensorMap
+from .tree import MCTSForest, Tree
diff --git a/torchrl/data/map/hash.py b/torchrl/data/map/hash.py
@@ -7,9 +7,10 @@
 from typing import Callable, List
 
 import torch
+from torch.nn import Module
 
 
-class BinaryToDecimal(torch.nn.Module):
+class BinaryToDecimal(Module):
     """A Module to convert binaries encoded tensors to decimals.
 
     This is a utility class that allow to convert a binary encoding tensor (e.g. `1001`) to
@@ -71,7 +72,7 @@ def forward(self, features: torch.Tensor) -> torch.Tensor:
         return aggregated_digits
 
 
-class SipHash(torch.nn.Module):
+class SipHash(Module):
     """A Module to Compute SipHash values for given tensors.
 
     A hash function module based on SipHash implementation in python.