From be61384272907bc4882c4d09d427bad8f8e3dd28 Mon Sep 17 00:00:00 2001 From: amcamd Date: Tue, 4 Aug 2020 12:29:08 -0500 Subject: [PATCH] cherry-pick Remove workaround for K==0 from rocBLAS-internal commit fc9729ce50366c5da2d547f1c0321458129c6152 --- clients/gtest/gemm_gtest.yaml | 5 +++-- library/src/tensile_host.cpp | 19 +++++-------------- tensile_tag.txt | 2 +- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/clients/gtest/gemm_gtest.yaml b/clients/gtest/gemm_gtest.yaml index 4a72630af..479884ff3 100644 --- a/clients/gtest/gemm_gtest.yaml +++ b/clients/gtest/gemm_gtest.yaml @@ -2911,9 +2911,10 @@ Tests: alpha_beta: *alpha_beta_range K: 0 matrix_size: - - { M: 1, N: 2 } - - { M: 3, N: 5 } + - { M: 1, N: 2 } + - { M: 3, N: 5 } - { M: 512, N: 100 } - { M: 63, N: 512 } + - { M: 100, N: 1000 } ... diff --git a/library/src/tensile_host.cpp b/library/src/tensile_host.cpp index 3469ac643..be8acfaf1 100644 --- a/library/src/tensile_host.cpp +++ b/library/src/tensile_host.cpp @@ -140,11 +140,6 @@ namespace freeIndex[0].c = freeIndex[0].d = 0; freeIndex[1].c = freeIndex[1].d = 1; - // Tensile does not support 0-sized dimensions. For when k == 0, we still need to - // multiply C by beta, but not add any of the rank-0 dot products. As a workaround, - // we pass k = 1 and set alpha == 0, since alpha == 0 has the same effect as k == 0. - auto k = prob.k == 0 ? 1 : prob.k; - // clang-format off // If A is transposed, swap the free and bound dimensions and their ranks @@ -152,7 +147,7 @@ namespace { a = { Tensile_Ti, - {k, prob.m, prob.batch_count}, + {prob.k, prob.m, prob.batch_count}, {prob.row_stride_a, prob.col_stride_a, prob.batch_stride_a} }; freeIndex[0].i = 1; @@ -162,7 +157,7 @@ namespace { a = { Tensile_Ti, - {prob.m, k, prob.batch_count}, + {prob.m, prob.k, prob.batch_count}, {prob.row_stride_a, prob.col_stride_a, prob.batch_stride_a} }; freeIndex[0].i = 0; @@ -178,7 +173,7 @@ namespace { b = { Tensile_Ti, - {prob.n, k, prob.batch_count}, + {prob.n, prob.k, prob.batch_count}, {prob.row_stride_b, prob.col_stride_b, prob.batch_stride_b} }; freeIndex[1].i = 0; @@ -188,7 +183,7 @@ namespace { b = { Tensile_Ti, - {k, prob.n, prob.batch_count}, + {prob.k, prob.n, prob.batch_count}, {prob.row_stride_b, prob.col_stride_b, prob.batch_stride_b} }; freeIndex[1].i = 1; @@ -303,11 +298,7 @@ namespace // alpha and beta are stored by value in Tensile::TypedContractionInputs // alpha and beta are copied from host to Tensile::TypedContractionInputs - // We set alpha = 0 if k == 0 (see above) - if(prob.k == 0) - memset(&inputs.alpha, 0, sizeof(inputs.alpha)); - else - AlphaBeta::copy(&inputs.alpha, prob.alpha); + AlphaBeta::copy(&inputs.alpha, prob.alpha); AlphaBeta::copy(&inputs.beta, prob.beta); return inputs; diff --git a/tensile_tag.txt b/tensile_tag.txt index 854707180..316ecaff3 100644 --- a/tensile_tag.txt +++ b/tensile_tag.txt @@ -1 +1 @@ -19319869e83243c5e3ca649532d2951de0ba35be +af71ea890a893e647bf2cf4571a90297d65689ca