diff --git a/test/unit/gemm/device/gemm_universal_f16t_s8n_f16t_mixed_input_tensor_op_f32_sm80.cu b/test/unit/gemm/device/gemm_universal_f16t_s8n_f16t_mixed_input_tensor_op_f32_sm80.cu
index 86d1da77..33d5195f 100644
--- a/test/unit/gemm/device/gemm_universal_f16t_s8n_f16t_mixed_input_tensor_op_f32_sm80.cu
+++ b/test/unit/gemm/device/gemm_universal_f16t_s8n_f16t_mixed_input_tensor_op_f32_sm80.cu
@@ -73,14 +73,14 @@ TEST(SM80_Device_GemmUniversal_f16t_s8n_f16t_mixed_input_tensor_op_f32, 128x128x
     ElementAccumulator, 
     cutlass::arch::OpClassTensorOp, 
     cutlass::arch::Sm80,
-    cutlass::gemm::GemmShape<128, 128, 64>,
-    cutlass::gemm::GemmShape<64, 64, 64>,
+    cutlass::gemm::GemmShape<128, 32, 64>,
+    cutlass::gemm::GemmShape<64, 16, 64>,
     cutlass::gemm::GemmShape<16, 8, 16>,
       cutlass::epilogue::thread::LinearCombination<
           ElementOutput, 128 / cutlass::sizeof_bits<ElementOutput>::value,
           ElementAccumulator, ElementAccumulator>,
     cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>, 
-    4,  // Stages
+    9,  // Stages
     8,  // AlignmentA
     16, // AlignmentB
     cutlass::arch::OpMultiplyAddMixedInputUpcast,