Use xmlrunner.XMLTestRunner accordingly in tests/L0/run_test.py (N…

…VIDIA#1451) * Use xmlrunner.XMLTestRunner accordingly TODO: - [x] Remove `subTest` because it's not compatible with the current way of running L0 tests Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> * use `torch.testing` more to enable xmlrunner Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> * Remove `subTest` for xmlrunner Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> * removing subTest Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> * not depend on an env var Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> * fix syntax errors * open with `"wb"` * xml file per dir Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> * remove comment-out Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> * Refactor `TestTransformer`: define member methods (NVIDIA#5) * setUpClass to define `test_` methods Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> * manually define Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> * add a missing test Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> * remove print Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> * remove ext Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com> Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com>
yuanzhedong · Jul 14, 2023 · ad47763 · ad47763
1 parent 4c17895
commit ad47763
Show file tree

Hide file tree

Showing 13 changed files with 941 additions and 950 deletions.
diff --git a/tests/L0/run_fused_layer_norm/test_fused_layer_norm.py b/tests/L0/run_fused_layer_norm/test_fused_layer_norm.py
@@ -3,7 +3,10 @@
 
 import torch
 
-import apex
+from apex.normalization import FusedLayerNorm
+from apex.normalization import FusedRMSNorm
+from apex.normalization import MixedFusedLayerNorm
+from apex.normalization import MixedFusedRMSNorm
 
 
 class TestFusedLayerNorm(unittest.TestCase):
@@ -18,15 +21,15 @@ class TestFusedLayerNorm(unittest.TestCase):
     def setUp(self):
         # bias and weight are set to 0 and 1 respectively, so no need to copy parameters from cpu module to the gpu one
         if not self.mixed_fused:
-            self.module_cpu_ = apex.normalization.FusedLayerNorm(
+            self.module_cpu_ = FusedLayerNorm(
                 normalized_shape=self.normalized_shape, elementwise_affine=self.elementwise_affine).cpu()
-            self.module_cuda_ = apex.normalization.FusedLayerNorm(
+            self.module_cuda_ = FusedLayerNorm(
                 normalized_shape=self.normalized_shape, elementwise_affine=self.elementwise_affine).to(device="cuda", dtype=self.dtype)
         else:
             assert self.elementwise_affine
-            self.module_cpu_ = apex.normalization.MixedFusedLayerNorm(
+            self.module_cpu_ = MixedFusedLayerNorm(
                 normalized_shape=self.normalized_shape).cpu()
-            self.module_cuda_ = apex.normalization.MixedFusedLayerNorm(
+            self.module_cuda_ = MixedFusedLayerNorm(
                 normalized_shape=self.normalized_shape).to(device="cuda", dtype=self.dtype)
 
 
@@ -65,8 +68,7 @@ def _check_same_output(self, batch_size, contiguous):
 
     def _test_same_output(self, batch_size):
         for contiguous in (True, False):
-            with self.subTest(contiguous=contiguous):
-                self._check_same_output(batch_size, contiguous)
+            self._check_same_output(batch_size, contiguous)
 
     def test_layer_norm(self):
         self._test_same_output(16)
@@ -87,15 +89,15 @@ class TestFusedRMSNorm(unittest.TestCase):
     def setUp(self):
         # bias and weight are set to 0 and 1 respectively, so no need to copy parameters from cpu module to the gpu one
         if not self.mixed_fused:
-            self.module_cpu_ = apex.normalization.FusedRMSNorm(
+            self.module_cpu_ = FusedRMSNorm(
                 normalized_shape=self.normalized_shape, elementwise_affine=self.elementwise_affine).cpu()
-            self.module_cuda_ = apex.normalization.FusedRMSNorm(
+            self.module_cuda_ = FusedRMSNorm(
                 normalized_shape=self.normalized_shape, elementwise_affine=self.elementwise_affine).to(device="cuda", dtype=self.dtype)
         else:
             assert self.elementwise_affine
-            self.module_cpu_ = apex.normalization.MixedFusedRMSNorm(
+            self.module_cpu_ = MixedFusedRMSNorm(
                 normalized_shape=self.normalized_shape).cpu()
-            self.module_cuda_ = apex.normalization.MixedFusedRMSNorm(
+            self.module_cuda_ = MixedFusedRMSNorm(
                 normalized_shape=self.normalized_shape).to(device="cuda", dtype=self.dtype)
 
     def _check_same_output(self, batch_size, contiguous):
@@ -136,8 +138,7 @@ def _check_same_output(self, batch_size, contiguous):
 
     def _test_same_output(self, batch_size):
         for contiguous in (True, False):
-            with self.subTest(contiguous=contiguous):
-                self._check_same_output(batch_size, contiguous)
+            self._check_same_output(batch_size, contiguous)
 
     def test_layer_norm(self):
         self._test_same_output(16)
@@ -204,17 +205,17 @@ def _prep_layers(normalized_shape, elementwise_affine, dtype):
     native = torch.nn.LayerNorm(
         normalized_shape=normalized_shape, elementwise_affine=elementwise_affine
     ).to(device="cuda", dtype=dtype)
-    fused = apex.normalization.FusedLayerNorm(
+    fused = FusedLayerNorm(
         normalized_shape=normalized_shape, elementwise_affine=elementwise_affine
     ).cuda()
     return native, fused
 
 
 def _prep_rms_layers(normalized_shape, elementwise_affine, dtype):
-    native = apex.normalization.FusedRMSNorm(
+    native = FusedRMSNorm(
         normalized_shape=normalized_shape, elementwise_affine=elementwise_affine
     )
-    fused = apex.normalization.FusedRMSNorm(
+    fused = FusedRMSNorm(
         normalized_shape=normalized_shape, elementwise_affine=elementwise_affine
     ).cuda()
     return native, fused
@@ -259,8 +260,7 @@ def _run_test(self, dtype, elementwise_affine):
 
     def test_autocast(self):
         for (dtype, elementwise_affine) in itertools.product(autocast_dtypes, (True, False)):
-            with self.subTest(f"{dtype}-{elementwise_affine}"):
-                self._run_test(dtype, elementwise_affine)
+            self._run_test(dtype, elementwise_affine)
 
 class TestAutocastFusedRMSNorm(unittest.TestCase):
     bf16_fwd_thresholds = dict(rtol=1.6e-2, atol=3e-4)
@@ -291,5 +291,8 @@ def _run_test(self, dtype, elementwise_affine):
 
     def test_autocast(self):
         for (dtype, elementwise_affine) in itertools.product(autocast_dtypes, (True, False)):
-            with self.subTest(f"{dtype}-{elementwise_affine}"):
-                self._run_test(dtype, elementwise_affine)
+            self._run_test(dtype, elementwise_affine)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/L0/run_mlp/test_mlp.py b/tests/L0/run_mlp/test_mlp.py
@@ -1,9 +1,12 @@
 """Tests for c++ MLP"""
-import unittest
+from itertools import product
 from time import time
 
 import torch
 from torch import nn
+from torch.testing._internal import common_utils
+from torch.testing._internal.common_device_type import instantiate_device_type_tests
+from torch.testing._internal.common_device_type import onlyCUDA
 
 from apex.mlp import MLP
 
@@ -14,7 +17,7 @@
 
 
 # note(crcrpar): On Ampere, this test should be run without TF32 enabled.
-class TestMLP(unittest.TestCase):
+class TestMLP(common_utils.TestCase):
     def test_creation(self):
         MLP(mlp_sizes)
 
@@ -24,10 +27,51 @@ def test_numeric(self):
         mlp_layers = []
         for i in range(mlp.num_layers):
             linear = nn.Linear(mlp_sizes[i], mlp_sizes[i + 1])
-            mlp.weights[i].data.copy_(linear.weight)
-            mlp.biases[i].data.copy_(linear.bias)
+            with torch.no_grad():
+                mlp.weights[i].copy_(linear.weight)
+                mlp.biases[i].copy_(linear.bias)
             mlp_layers.append(linear)
-            mlp_layers.append(nn.ReLU(inplace=True))
+            mlp_layers.append(nn.ReLU())
+
+        ref_mlp = nn.Sequential(*mlp_layers).cuda()
+
+        test_input = (
+            torch.empty(batch_size, mlp_sizes[0], device="cuda")
+            .uniform_(-1.0, 1.0)
+            .requires_grad_()
+        )
+        ref_input = test_input.clone().detach().requires_grad_()
+        mlp_out = mlp(test_input)
+        ref_out = ref_mlp(ref_input)
+        self.assertEqual(mlp_out, ref_out)
+
+        # Use mean value as scalar loss. Multiply 10 to make it big enough not zero out
+        mlp_out.mean().mul(10.0).backward()
+        ref_out.mean().mul(10.0).backward()
+        self.assertEqual(test_input.grad, ref_input.grad)
+        self.assertEqual(mlp.biases[0].grad, ref_mlp[0].bias.grad)
+
+    @common_utils.parametrize(
+        "use_activation,bias",
+        list(product(("none", "relu", "sigmoid"), (True, False))),
+    )
+    def test_mlp(self, use_activation: str, bias: bool):
+        # for use_activation in ["none", "relu", "sigmoid"]:
+        msg = f"activation: {use_activation}, bias: {bias}"
+        mlp = MLP(mlp_sizes, bias=bias, activation=use_activation).cuda()
+
+        mlp_layers = []
+        for i in range(mlp.num_layers):
+            linear = nn.Linear(mlp_sizes[i], mlp_sizes[i + 1], bias=bias)
+            with torch.no_grad():
+                mlp.weights[i].copy_(linear.weight)
+                if bias:
+                    mlp.biases[i].copy_(linear.bias)
+            mlp_layers.append(linear)
+            if use_activation == "relu":
+                mlp_layers.append(nn.ReLU())
+            if use_activation == "sigmoid":
+                mlp_layers.append(nn.Sigmoid())
 
         ref_mlp = nn.Sequential(*mlp_layers).cuda()
 
@@ -39,90 +83,23 @@ def test_numeric(self):
         ref_input = test_input.clone().detach().requires_grad_()
         mlp_out = mlp(test_input)
         ref_out = ref_mlp(ref_input)
-        torch.testing.assert_close(mlp_out, ref_out)
+        self.assertEqual(mlp_out, ref_out, msg=msg)
 
         # Use mean value as scalar loss. Multiply 10 to make it big enough not zero out
         mlp_out.mean().mul(10.0).backward()
         ref_out.mean().mul(10.0).backward()
-        torch.testing.assert_close(test_input.grad, ref_input.grad)
-        torch.testing.assert_close(mlp.biases[0].grad, ref_mlp[0].bias.grad)
-
-    def test_no_bias(self):
-        for use_activation in ["none", "relu", "sigmoid"]:
-            with self.subTest(use_activation=use_activation):
-                mlp = MLP(mlp_sizes, bias=False, activation=use_activation).cuda()
-
-                mlp_layers = []
-                for i in range(mlp.num_layers):
-                    linear = nn.Linear(mlp_sizes[i], mlp_sizes[i + 1], bias=False)
-                    mlp.weights[i].data.copy_(linear.weight)
-                    mlp_layers.append(linear)
-                    if use_activation == "relu":
-                        mlp_layers.append(nn.ReLU(inplace=True))
-                    if use_activation == "sigmoid":
-                        mlp_layers.append(nn.Sigmoid())
-
-                ref_mlp = nn.Sequential(*mlp_layers).cuda()
-
-                test_input = (
-                    torch.empty(batch_size, mlp_sizes[0], device="cuda")
-                    .uniform_(-1.0, 1.0)
-                    .requires_grad_()
-                )
-                ref_input = test_input.clone().detach().requires_grad_()
-                mlp_out = mlp(test_input)
-                ref_out = ref_mlp(ref_input)
-                torch.testing.assert_close(mlp_out, ref_out)
-
-                # Use mean value as scalar loss. Multiply 10 to make it big enough not zero out
-                mlp_out.mean().mul(10.0).backward()
-                ref_out.mean().mul(10.0).backward()
-                torch.testing.assert_close(test_input.grad, ref_input.grad)
-                torch.testing.assert_close(mlp.weights[0].grad, ref_mlp[0].weight.grad)
-
-    def test_with_bias(self):
-        for use_activation in ["none", "relu", "sigmoid"]:
-            with self.subTest(use_activation=use_activation):
-                mlp = MLP(mlp_sizes, bias=True, activation=use_activation).cuda()
-
-                mlp_layers = []
-                for i in range(mlp.num_layers):
-                    linear = nn.Linear(mlp_sizes[i], mlp_sizes[i + 1], bias=True)
-                    mlp.weights[i].data.copy_(linear.weight)
-                    mlp.biases[i].data.copy_(linear.bias)
-                    mlp_layers.append(linear)
-                    if use_activation == "relu":
-                        mlp_layers.append(nn.ReLU(inplace=True))
-                    if use_activation == "sigmoid":
-                        mlp_layers.append(nn.Sigmoid())
-
-                ref_mlp = nn.Sequential(*mlp_layers).cuda()
-
-                test_input = (
-                    torch.empty(batch_size, mlp_sizes[0], device="cuda")
-                    .uniform_(-1.0, 1.0)
-                    .requires_grad_()
-                )
-                ref_input = test_input.clone().detach().requires_grad_()
-                mlp_out = mlp(test_input)
-                ref_out = ref_mlp(ref_input)
-                torch.testing.assert_close(mlp_out, ref_out)
-
-                # Use mean value as scalar loss. Multiply 10 to make it big enough not zero out
-                mlp_out.mean().mul(10.0).backward()
-                ref_out.mean().mul(10.0).backward()
-                torch.testing.assert_close(test_input.grad, ref_input.grad)
-                torch.testing.assert_close(mlp.weights[0].grad, ref_mlp[0].weight.grad)
-                torch.testing.assert_close(mlp.biases[0].grad, ref_mlp[0].bias.grad)
+        self.assertEqual(test_input.grad, ref_input.grad, msg=msg)
+        self.assertEqual(mlp.weights[0].grad, ref_mlp[0].weight.grad, msg=msg)
 
     def test_no_grad(self):
         mlp = MLP(mlp_sizes).cuda()
 
         mlp_layers = []
         for i in range(mlp.num_layers):
             linear = nn.Linear(mlp_sizes[i], mlp_sizes[i + 1])
-            mlp.weights[i].data.copy_(linear.weight)
-            mlp.biases[i].data.copy_(linear.bias)
+            with torch.no_grad():
+                mlp.weights[i].copy_(linear.weight)
+                mlp.biases[i].copy_(linear.bias)
             mlp_layers.append(linear)
             mlp_layers.append(nn.ReLU(inplace=True))
 
@@ -132,12 +109,12 @@ def test_no_grad(self):
         ref_input = test_input.clone().detach()
         mlp_out = mlp(test_input)
         ref_out = ref_mlp(ref_input)
-        torch.testing.assert_close(mlp_out, ref_out)
+        self.assertEqual(mlp_out, ref_out)
 
         # Use mean value as scalar loss. Multiply 10 to make it big enough not zero out
         mlp_out.mean().mul(10.0).backward()
         ref_out.mean().mul(10.0).backward()
-        torch.testing.assert_close(mlp.weights[0].grad, ref_mlp[0].weight.grad)
+        self.assertEqual(mlp.weights[0].grad, ref_mlp[0].weight.grad)
 
     def test_performance_half(self):
         mlp = MLP(mlp_sizes).cuda().half()
@@ -206,5 +183,8 @@ def test_performance_half(self):
         )
 
 
+instantiate_device_type_tests(TestMLP, globals(), only_for=("cuda",))
+
+
 if __name__ == "__main__":
-    unittest.main()
+    common_utils.run_tests()
diff --git a/tests/L0/run_test.py b/tests/L0/run_test.py
@@ -2,7 +2,9 @@
 
 How to run this script?
 
-1. Run all the tests: `python /path/to/apex/tests/L0/run_test.py`
+1. Run all the tests: `python /path/to/apex/tests/L0/run_test.py` If you want an xml report,
+    pass `--xml-report`, i.e. `python /path/to/apex/tests/L0/run_test.py --xml-report` and
+    the file is created in `/path/to/apex/tests/L0`.
 2. Run one of the tests (e.g. fused layer norm):
     `python /path/to/apex/tests/L0/run_test.py --include run_fused_layer_norm`
 3. Run two or more of the tests (e.g. optimizers and fused layer norm):
@@ -43,16 +45,35 @@ def parse_args():
         default=DEFAULT_TEST_DIRS,
         help="select a set of tests to run (defaults to ALL tests).",
     )
+    parser.add_argument(
+        "--xml-report",
+        action="store_true",
+        help="pass this argument to get a junit xml report. (requires `xmlrunner`)",
+    )
     args, _ = parser.parse_known_args()
     return args
 
 
-def main(args):
-    runner = unittest.TextTestRunner(verbosity=2)
+def main(args: argparse.Namespace) -> None:
+    test_runner_kwargs = {"verbosity": 2}
+    Runner = unittest.TextTestRunner
+    if args.xml_report:
+        import xmlrunner
+        from datetime import date  # NOQA
+        Runner = xmlrunner.XMLTestRunner
+
     errcode = 0
     for test_dir in args.include:
+        if args.xml_report:
+            this_dir = os.path.abspath(os.path.dirname(__file__))
+            xml_output = os.path.join(
+                this_dir,
+                f"""TEST_{test_dir}_{date.today().strftime("%y%m%d")}""",
+            )
+            test_runner_kwargs["output"] = xml_output
+
+        runner = Runner(**test_runner_kwargs)
         test_dir = os.path.join(TEST_ROOT, test_dir)
-        print(test_dir)
         suite = unittest.TestLoader().discover(test_dir)
 
         print("\nExecuting tests from " + test_dir)