[Minor] More accurate code coverage for reversible (#182)

facebookresearch · Jan 18, 2022 · 60e94e5 · 60e94e5
1 parent 04bb6c1
commit 60e94e5
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 6 deletions.
diff --git a/tests/test_reversible.py b/tests/test_reversible.py
@@ -140,7 +140,7 @@ def test_reversible_no_alternate(device):
         _ = xFormer.from_config(xFormerConfig([rev, non_rev])).to(device)
 
 
-@pytest.mark.parametrize("config", [_test_configs[1]])
+@pytest.mark.parametrize("config", _test_configs)
 @pytest.mark.parametrize("device", DEVICES)
 def test_reversible_train(config, device):
     torch.manual_seed(0)
@@ -212,5 +212,6 @@ def evaluate(model: torch.nn.Module):
     # Arbitrary threshold
     eval_stop_rev = evaluate(model_reversible)
     eval_stop_non_rev = evaluate(model_non_reversible)
-    assert eval_start_rev / eval_stop_rev > 3
-    assert eval_start_non_rev / eval_stop_non_rev > 3
+    if len(config) < 2:  # only check the encoder case
+        assert eval_start_rev / eval_stop_rev > 3
+        assert eval_start_non_rev / eval_stop_non_rev > 3
diff --git a/xformers/components/reversible.py b/xformers/components/reversible.py
@@ -22,7 +22,7 @@ class Deterministic(nn.Module):
     def __init__(self, net: nn.Module):
         super().__init__()
         self.net = net
-        self.cpu_state: torch.Tensor
+        self.cpu_state: torch.Tensor = torch.get_rng_state()
         self.cuda_in_fwd: bool = False
         self.gpu_devices: List[int] = []
         self.gpu_states: List[torch.Tensor] = []
@@ -68,7 +68,9 @@ def forward(self, x: torch.Tensor, f_args={}, g_args={}):
 
         return torch.cat([y1, y2], dim=self.split_dim)
 
-    def backward_pass(self, y: torch.Tensor, dy: torch.Tensor, f_args={}, g_args={}):
+    def backward_pass(
+        self, y: torch.Tensor, dy: torch.Tensor, f_args={}, g_args={}
+    ):  # pragma: no cover  # this is covered, but called directly from C++
         y1, y2 = torch.chunk(y, 2, dim=self.split_dim)
         del y
 
@@ -118,7 +120,9 @@ def forward(ctx, x, blocks, kwargs):
         return x
 
     @staticmethod
-    def backward(ctx, dy):
+    def backward(
+        ctx, dy
+    ):  # pragma: no cover # this is covered, but called directly from C++
         y = ctx.y
         kwargs = ctx.kwargs
         for block in ctx.blocks[::-1]: