Sum enhancement in case of inner dim reduce (#2543)

* Initialize sum, modify layernorm * FLOAT to FLOAT_ACCUM in kernel, fix kernel index and host test and, split teardown to runtest and verify * remove unused var, int64_t to size_t, add two kernel profile, fix kernel index error, change reqd_work_item_cnt * Use GetMaxComputeUnits, fix GetSumWorkspaceSize flow * Add doxygen, add test case * remove MIOPEN_BETA_API * modify tolerance, add solver list * alignment * add IsImprovementOverROCm, reduce to sqrt(reduce), modify test case * throw to return false in performance check, duplicate code to function, fix wrong allocate memmory size * add experimental caution in doc, add memory copy check in driver, add detail in verify result of driver * modify tolerance * modify get input in driver
ROCm · Nov 30, 2023 · 9ec5dc0 · 9ec5dc0
1 parent d8e23ad
commit 9ec5dc0
Show file tree

Hide file tree

Showing 27 changed files with 1,832 additions and 20 deletions.
diff --git a/docs/apireference.rst b/docs/apireference.rst
@@ -22,4 +22,5 @@ API Reference
     dropout
     reduction
     layernorm
+    sum
 
diff --git a/docs/layernorm.rst b/docs/layernorm.rst
@@ -1,8 +1,9 @@
 
-Layernorm Layer
-===================
+Layernorm Layer(experimental)
+=============================
 
 The layernorm  types and functions.
+To enable this, define MIOPEN_BETA_API before including miopen.h.
 
 
 miopenLayerNormMode_t

diff --git a/docs/sum.rst b/docs/sum.rst
@@ -0,0 +1,23 @@
+
+Sum Layer(experimental)
+========================
+
+The sum types and functions.
+To enable this, define MIOPEN_BETA_API before including miopen.h.
+
+
+miopenSumNanPropagation_t
+----------------------------------
+
+.. doxygenenum::  miopenSumNanPropagation_t
+
+miopenGetSumWorkspaceSize
+----------------------------------
+
+.. doxygenfunction::  miopenGetSumWorkspaceSize
+
+miopenSumForward
+----------------------------------
+
+.. doxygenfunction::  miopenSumForward
+
diff --git a/driver/driver.hpp b/driver/driver.hpp
@@ -150,7 +150,7 @@ inline void PadBufferSize(size_t& sz, int datatype_sz)
     printf("Supported Base Arguments: conv[fp16|int8|bfp16|fp8|bfp8], CBAInfer[fp16], "
            "pool[fp16], lrn[fp16], "
            "activ[fp16], softmax[fp16], bnorm[fp16], rnn[fp16], gemm, ctc, dropout[fp16], "
-           "tensorop[fp16], reduce[fp16,fp64], layernorm[bfp16, fp16]\n");
+           "tensorop[fp16], reduce[fp16|fp64], layernorm[bfp16|fp16], sum[bfp16|fp16]\n");
     exit(0); // NOLINT (concurrency-mt-unsafe)
 }
 
@@ -172,7 +172,8 @@ inline std::string ParseBaseArg(int argc, char* argv[])
        arg != "rnn_seqfp16" && arg != "gemm" /*&& arg != "gemmfp16"*/ && arg != "ctc" &&
        arg != "dropout" && arg != "dropoutfp16" && arg != "tensorop" && arg != "tensoropfp16" &&
        arg != "reduce" && arg != "reducefp16" && arg != "reducefp64" && arg != "layernorm" &&
-       arg != "layernormfp16" && arg != "layernormbfp16" && arg != "--version")
+       arg != "layernormfp16" && arg != "layernormbfp16" && arg != "sum" && arg != "sumfp16" &&
+       arg != "sumbfp16" && arg != "--version")
     {
         printf("FAILED: Invalid Base Input Argument\n");
         Usage();

diff --git a/driver/layernorm_driver.hpp b/driver/layernorm_driver.hpp
@@ -139,7 +139,7 @@ int LayerNormDriver<Tgpu, Tref>::GetandSetData()
 {
     std::vector<int> in_len = GetInputTensorLengthsFromCmdLine();
 
-    dim = static_cast<int>(inflags.GetValueDouble("nomalized_dim"));
+    dim = inflags.GetValueInt("normalized_dim");
 
     std::vector<int> inner_len;
     if(dim == in_len.size())
@@ -379,10 +379,6 @@ Tref LayerNormDriver<Tgpu, Tref>::GetTolerance()
     {
         return 5e-5;
     }
-    else if(data_type == miopenDouble)
-    {
-        return 1e-10;
-    }
     else if(data_type == miopenBFloat16)
     {
         return 5e-3;

diff --git a/driver/main.cpp b/driver/main.cpp
@@ -41,9 +41,10 @@
 #include "dropout_driver.hpp"
 #include "tensorop_driver.hpp"
 #include "reduce_driver.hpp"
+#include "layernorm_driver.hpp"
+#include "sum_driver.hpp"
 #include <miopen/config.h>
 #include <miopen/stringutils.hpp>
-#include "layernorm_driver.hpp"
 
 int main(int argc, char* argv[])
 {
@@ -209,6 +210,18 @@ int main(int argc, char* argv[])
     {
         drv = new LayerNormDriver<bfloat16, float>();
     }
+    else if(base_arg == "sum")
+    {
+        drv = new SumDriver<float, float>();
+    }
+    else if(base_arg == "sumfp16")
+    {
+        drv = new SumDriver<float16, float>();
+    }
+    else if(base_arg == "sumbfp16")
+    {
+        drv = new SumDriver<bfloat16, float>();
+    }
     else
     {
         printf("Incorrect BaseArg\n");