diff --git a/test/unit/gemm/device/gemm_universal_f16t_s8n_f16t_mixed_input_tensor_op_f32_sm80.cu b/test/unit/gemm/device/gemm_universal_f16t_s8n_f16t_mixed_input_tensor_op_f32_sm80.cu index 86d1da77..33d5195f 100644 --- a/test/unit/gemm/device/gemm_universal_f16t_s8n_f16t_mixed_input_tensor_op_f32_sm80.cu +++ b/test/unit/gemm/device/gemm_universal_f16t_s8n_f16t_mixed_input_tensor_op_f32_sm80.cu @@ -73,14 +73,14 @@ TEST(SM80_Device_GemmUniversal_f16t_s8n_f16t_mixed_input_tensor_op_f32, 128x128x ElementAccumulator, cutlass::arch::OpClassTensorOp, cutlass::arch::Sm80, - cutlass::gemm::GemmShape<128, 128, 64>, - cutlass::gemm::GemmShape<64, 64, 64>, + cutlass::gemm::GemmShape<128, 32, 64>, + cutlass::gemm::GemmShape<64, 16, 64>, cutlass::gemm::GemmShape<16, 8, 16>, cutlass::epilogue::thread::LinearCombination< ElementOutput, 128 / cutlass::sizeof_bits::value, ElementAccumulator, ElementAccumulator>, cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>, - 4, // Stages + 9, // Stages 8, // AlignmentA 16, // AlignmentB cutlass::arch::OpMultiplyAddMixedInputUpcast,