From 36545132e30073b2b78668265e9a0b89898892f8 Mon Sep 17 00:00:00 2001 From: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com> Date: Wed, 29 Jun 2022 18:22:05 +0800 Subject: [PATCH] optimize ci speed in expensive eager test (#8504) Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Shenghang Tsai --- .../test/expensive/test_convtranspose.py | 12 +-- python/oneflow/test/expensive/test_einsum.py | 100 +++++++++--------- .../test/expensive/test_sqrt_square_sum.py | 13 ++- .../torch_flow_dual_object.py | 6 ++ 4 files changed, 68 insertions(+), 63 deletions(-) diff --git a/python/oneflow/test/expensive/test_convtranspose.py b/python/oneflow/test/expensive/test_convtranspose.py index fe5ce95b835..62736c171cf 100644 --- a/python/oneflow/test/expensive/test_convtranspose.py +++ b/python/oneflow/test/expensive/test_convtranspose.py @@ -278,7 +278,7 @@ def test_ConvTranspose1d(test_case): for arg in GenArgList(arg_dict): arg[0](test_case, *arg[1:]) - @autotest() + @autotest(n=5) def test_ConvTranspose1d_(test_case): channels = random(1, 6) m = torch.nn.ConvTranspose1d( @@ -299,7 +299,7 @@ def test_ConvTranspose1d_(test_case): return y @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases") - @autotest(n=30) + @autotest(n=5) def test_deconv1d_group_with_random_data(test_case): channels = 720 # lcm(1, 2, 3, 4, 5, 6) m = torch.nn.ConvTranspose1d( @@ -322,7 +322,7 @@ def test_deconv1d_group_with_random_data(test_case): y = m(x) return y - @autotest() + @autotest(n=5) def test_ConvTranspose3d_(test_case): channels = random(1, 2) m = torch.nn.ConvTranspose3d( @@ -343,9 +343,9 @@ def test_ConvTranspose3d_(test_case): return y @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases") - @autotest(n=15) + @autotest(n=5) def test_deconv3d_group_with_random_data(test_case): - channels = 720 # lcm(1, 2, 3, 4, 5, 6) + channels = 120 # lcm(1, 2, 3, 4, 5) m = torch.nn.ConvTranspose3d( in_channels=channels, out_channels=channels, @@ -353,7 +353,7 @@ def test_deconv3d_group_with_random_data(test_case): stride=random() | nothing(), padding=random(1, 3).to(int) | nothing(), dilation=random(1, 5) | nothing(), - groups=random(1, 7), + groups=random(1, 6), padding_mode=constant("zeros") | nothing(), ) m.train(random()) diff --git a/python/oneflow/test/expensive/test_einsum.py b/python/oneflow/test/expensive/test_einsum.py index 716f2a378b2..2cfc9a00273 100644 --- a/python/oneflow/test/expensive/test_einsum.py +++ b/python/oneflow/test/expensive/test_einsum.py @@ -22,14 +22,14 @@ @flow.unittest.skip_unless_1n1d() class TestEinsum(flow.unittest.TestCase): - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_matrix_transpose(test_case): device = random_device() x = random_tensor(ndim=2, dim0=random(1, 6), dim1=random(1, 6),).to(device) z = torch.einsum("ij->ji", x) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_eltwise_multiply(test_case): device = random_device() dim0 = random(1, 6) @@ -39,7 +39,7 @@ def test_einsum_eltwise_multiply(test_case): z = torch.einsum("ij,ij->ij", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_get_diagonal(test_case): device = random_device() dim = random(1, 6) @@ -47,7 +47,7 @@ def test_einsum_get_diagonal(test_case): z = torch.einsum("ii->i", x) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_batch_permute(test_case): device = random_device() x = random_tensor( @@ -61,21 +61,21 @@ def test_einsum_batch_permute(test_case): z = torch.einsum("...ij->...ji", x) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_reduce_sum(test_case): device = random_device() x = random_tensor(ndim=2, dim0=random(1, 6), dim1=random(1, 6),).to(device) z = torch.einsum("ij->", x) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_matrix_column_sum(test_case): device = random_device() x = random_tensor(ndim=2, dim0=random(1, 6), dim1=random(1, 6),).to(device) z = torch.einsum("ij->j", x) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_matrix_vector_multiply(test_case): device = random_device() dim0 = random(1, 6) @@ -86,7 +86,7 @@ def test_einsum_matrix_vector_multiply(test_case): z = torch.einsum("ik,k", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_matmul(test_case): device = random_device() dim0 = random(1, 6) @@ -98,7 +98,7 @@ def test_einsum_matmul(test_case): z = torch.einsum("ik,kj", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_vector_inner_product(test_case): device = random_device() dim0 = random(1, 6) @@ -108,7 +108,7 @@ def test_einsum_vector_inner_product(test_case): z = torch.einsum("i,i", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_eltwise_mul_then_reduce_sum(test_case): device = random_device() dim0 = random(1, 6) @@ -119,7 +119,7 @@ def test_einsum_eltwise_mul_then_reduce_sum(test_case): z = torch.einsum("ij,ij", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_vector_outer_product(test_case): device = random_device() x = random_tensor(ndim=1, dim0=random(1, 6),).to(device) @@ -128,7 +128,7 @@ def test_einsum_vector_outer_product(test_case): z = torch.einsum("i,j", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_batch_matmul(test_case): device = random_device() dim0 = random(1, 6) @@ -138,7 +138,7 @@ def test_einsum_batch_matmul(test_case): z = torch.einsum("ijk,ikl->ijl", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_tensor_contraction(test_case): device = random_device() dim0 = random(1, 6) @@ -157,7 +157,7 @@ def test_einsum_tensor_contraction(test_case): z = torch.einsum("pqrs,tuqvr->pstuv", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_bilinear_transformation(test_case): device = random_device() dim0 = random(1, 6) @@ -178,7 +178,7 @@ def test_einsum_0_size_tensor(test_case): z = torch.einsum("ijk", x) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_tensor_contraction2(test_case): device = random_device() dim0 = random(1, 6) @@ -189,7 +189,7 @@ def test_einsum_tensor_contraction2(test_case): z = torch.einsum("b n h w, n d -> b d h w", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_eltwise_mul_sum_row(test_case): device = random_device() dim0 = random(1, 6) @@ -199,7 +199,7 @@ def test_einsum_eltwise_mul_sum_row(test_case): z = torch.einsum("n d, n d -> n", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_matmul2(test_case): device = random_device() dim0 = random(1, 6) @@ -208,7 +208,7 @@ def test_einsum_matmul2(test_case): z = torch.einsum("i d, j d -> i j", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_attention(test_case): device = random_device() dim0 = random(1, 6) @@ -223,7 +223,7 @@ def test_einsum_attention(test_case): z = torch.einsum("b h i d, b h j d -> b h i j", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_batch_matmul2(test_case): device = random_device() dim0 = random(1, 6) @@ -238,7 +238,7 @@ def test_einsum_batch_matmul2(test_case): z = torch.einsum("b h i j, b h j d -> b h i d", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_batch_matrix_vector_multiply(test_case): device = random_device() dim0 = random(1, 6) @@ -251,7 +251,7 @@ def test_einsum_batch_matrix_vector_multiply(test_case): z = torch.einsum("b i d, b i j d -> b i j", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_batch_matmul3(test_case): device = random_device() dim0 = random(1, 6) @@ -263,7 +263,7 @@ def test_einsum_batch_matmul3(test_case): z = torch.einsum("b x i d, b j d -> b x i j", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_batch_matmul4(test_case): device = random_device() dim0 = random(1, 6) @@ -275,7 +275,7 @@ def test_einsum_batch_matmul4(test_case): z = torch.einsum("b x i j, b j d -> b x i d", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_alphaflod_usecase1(test_case): device = random_device() dim0 = random(1, 6) @@ -285,7 +285,7 @@ def test_einsum_alphaflod_usecase1(test_case): z = torch.einsum("hij, ijc->ihc", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_alphaflod_usecase2(test_case): device = random_device() dim0 = random(1, 6) @@ -295,7 +295,7 @@ def test_einsum_alphaflod_usecase2(test_case): z = torch.einsum("rac,rab->rbc", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_alphaflod_usecase3(test_case): device = random_device() dim0 = random(1, 6) @@ -305,7 +305,7 @@ def test_einsum_alphaflod_usecase3(test_case): z = torch.einsum("ra,rab->rb", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_alphaflod_usecase4(test_case): device = random_device() dim0 = random(1, 6) @@ -315,7 +315,7 @@ def test_einsum_alphaflod_usecase4(test_case): z = torch.einsum("qhc,khc->qkh", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_alphaflod_usecase5(test_case): device = random_device() dim0 = random(1, 6) @@ -326,7 +326,7 @@ def test_einsum_alphaflod_usecase5(test_case): z = torch.einsum("nm, mrc->nrc", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_alphaflod_usecase6(test_case): device = random_device() dim0 = random(1, 6) @@ -336,7 +336,7 @@ def test_einsum_alphaflod_usecase6(test_case): z = torch.einsum("abc,adc->bdc", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_alphaflod_usecase7(test_case): device = random_device() dim0 = random(1, 6) @@ -348,7 +348,7 @@ def test_einsum_alphaflod_usecase7(test_case): z = torch.einsum("dceb,cef->dbf", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_alphaflod_usecase8(test_case): device = random_device() dim0 = random(1, 6) @@ -361,7 +361,7 @@ def test_einsum_alphaflod_usecase8(test_case): z = torch.einsum("acb,ade->dceb", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_alphaflod_usecase9(test_case): device = random_device() dim0 = random(1, 6) @@ -372,7 +372,7 @@ def test_einsum_alphaflod_usecase9(test_case): z = torch.einsum("qkc,ch->hqk", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_alphaflod_usecase10(test_case): device = random_device() dim0 = random(1, 6) @@ -387,7 +387,7 @@ def test_einsum_alphaflod_usecase10(test_case): z = torch.einsum("bhqk,bkhc->bqhc", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_alphaflod_usecase11(test_case): device = random_device() dim0 = random(1, 6) @@ -400,7 +400,7 @@ def test_einsum_alphaflod_usecase11(test_case): z = torch.einsum("bqa,ahc->bqhc", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_ellipsis_usecase1(test_case): device = random_device() dim0 = random(1, 6) @@ -413,7 +413,7 @@ def test_einsum_ellipsis_usecase1(test_case): z = torch.einsum("...lc, ...c -> ...l", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_ellipsis_usecase2(test_case): device = random_device() dim0 = random(1, 6) @@ -423,7 +423,7 @@ def test_einsum_ellipsis_usecase2(test_case): z = torch.einsum("...lc, ...lc -> ...l", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_ellipsis_usecase3(test_case): device = random_device() dim0 = random(1, 6) @@ -436,7 +436,7 @@ def test_einsum_ellipsis_usecase3(test_case): z = torch.einsum("...id,...jd->...ij", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_ellipsis_usecase4(test_case): device = random_device() dim0 = random(1, 6) @@ -448,7 +448,7 @@ def test_einsum_ellipsis_usecase4(test_case): z = torch.einsum("...klm,kmn->...kln", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_ellipsis_usecase5(test_case): device = random_device() dim0 = random(1, 6) @@ -461,7 +461,7 @@ def test_einsum_ellipsis_usecase5(test_case): z = torch.einsum("...ikl, ...jk -> ...ijl", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_ellipsis_usecase6(test_case): device = random_device() dim0 = random(1, 6) @@ -474,7 +474,7 @@ def test_einsum_ellipsis_usecase6(test_case): z = torch.einsum("...l,...l->...", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_ellipsis_usecase7(test_case): device = random_device() dim0 = random(1, 6) @@ -487,7 +487,7 @@ def test_einsum_ellipsis_usecase7(test_case): z = torch.einsum("ijk,ijk...->ij...", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_other_usecase1(test_case): device = random_device() dim0 = random(1, 6) @@ -499,7 +499,7 @@ def test_einsum_other_usecase1(test_case): z = torch.einsum("bxi,oij,byj->boxy", x, y, w) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_other_usecase2(test_case): device = random_device() dim0 = random(1, 6) @@ -513,7 +513,7 @@ def test_einsum_other_usecase2(test_case): z = torch.einsum("ijac,ijkp->ijakcp", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_other_usecase3(test_case): device = random_device() dim0 = random(1, 6) @@ -525,7 +525,7 @@ def test_einsum_other_usecase3(test_case): z = torch.einsum("cdij,cbi->cdbj", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_fastfold_usecase1(test_case): device = random_device() dim0 = random(1, 6) @@ -540,7 +540,7 @@ def test_einsum_fastfold_usecase1(test_case): z = torch.einsum("bsid,bsjd->bijd", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_fastfold_usecase2(test_case): device = random_device() dim0 = random(1, 6) @@ -554,7 +554,7 @@ def test_einsum_fastfold_usecase2(test_case): z = torch.einsum("bsid,bsje->bijde", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_openfold_usecase1(test_case): device = random_device() dim0 = random(1, 6) @@ -567,7 +567,7 @@ def test_einsum_openfold_usecase1(test_case): z = torch.einsum("...bac,...dae->...bdce", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_openfold_usecase2(test_case): device = random_device() dim0 = random(1, 6) @@ -581,7 +581,7 @@ def test_einsum_openfold_usecase2(test_case): z = torch.einsum("...abc,...adc->...bdc", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_openfold_usecase3(test_case): device = random_device() dim0 = random(1, 6) @@ -595,7 +595,7 @@ def test_einsum_openfold_usecase3(test_case): z = torch.einsum("...qhd,...khd->...hqk", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_openfold_usecase4(test_case): device = random_device() dim0 = random(1, 6) @@ -609,7 +609,7 @@ def test_einsum_openfold_usecase4(test_case): z = torch.einsum("...vhf,...qhv->...qhf", x, y) return z - @autotest(n=20, check_graph=True) + @autotest(n=5) def test_einsum_openfold_usecase5(test_case): device = random_device() dim0 = random(1, 6) diff --git a/python/oneflow/test/expensive/test_sqrt_square_sum.py b/python/oneflow/test/expensive/test_sqrt_square_sum.py index a8756dd5ac5..1fbf0a22b2b 100644 --- a/python/oneflow/test/expensive/test_sqrt_square_sum.py +++ b/python/oneflow/test/expensive/test_sqrt_square_sum.py @@ -24,33 +24,32 @@ @flow.unittest.skip_unless_1n1d() class TestLinalgVectorNorm2D(flow.unittest.TestCase): - @autotest(n=30, auto_backward=False, check_graph=True, rtol=0.5, atol=0.5) + @autotest(n=2, auto_backward=False, check_graph=True, rtol=0.5, atol=0.5) def test_sqrt_sum_with_cpu_random_data(test_case): device = cpu_device() - x = random_tensor(ndim=4, dim1=30, dim2=40, dim3=50, requires_grad=False).to( + x = random_tensor(ndim=4, dim1=3, dim2=4, dim3=5, requires_grad=False).to( device ) y = torch.linalg.norm(x) return y @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases") - @autotest(n=30, auto_backward=False, check_graph=True) + @autotest(n=2, auto_backward=False, check_graph=True) def test_sqrt_sum_with_cuda_random_data(test_case): device = gpu_device() - x = random_tensor(ndim=4, dim1=100, dim2=100, dim3=100, requires_grad=False).to( + x = random_tensor(ndim=4, dim1=10, dim2=10, dim3=10, requires_grad=False).to( device ) y = torch.linalg.norm(x) return y - @autotest(n=30, auto_backward=False, check_graph=True, rtol=0.5, atol=0.5) + @autotest(n=2, auto_backward=False, check_graph=True, rtol=0.5, atol=0.5) def test_scalar_print_random_data(test_case): device = random_device() - x = random_tensor(ndim=4, dim1=30, dim2=40, dim3=50, requires_grad=False).to( + x = random_tensor(ndim=4, dim1=3, dim2=4, dim3=5, requires_grad=False).to( device ) y = torch.linalg.norm(x) - print(f"grad_norm {y.oneflow:.4f}\t") return y diff --git a/python/oneflow/test_utils/automated_test_util/torch_flow_dual_object.py b/python/oneflow/test_utils/automated_test_util/torch_flow_dual_object.py index 1deaeda6ad0..159ad0a8d47 100644 --- a/python/oneflow/test_utils/automated_test_util/torch_flow_dual_object.py +++ b/python/oneflow/test_utils/automated_test_util/torch_flow_dual_object.py @@ -1058,6 +1058,12 @@ def check_tensor_equality( torch_grad, flow_grad, rtol=rtol, atol=atol, equal_nan=True, ): print_note_fake_program(detail=True) + print("---------Grad Shape--------") + print(torch_grad.shape) + print(flow_grad.shape) + print( + f"Grads are not equal. PyTorch grad: \n{torch_grad}\n, OneFlow grad: \n{flow_grad}" + ) return False torch_numpy = torch_tensor.detach().cpu().numpy() oneflow_numpy = flow_tensor.numpy()