diff --git a/driver/argmax_driver.hpp b/driver/argmax_driver.hpp
index d0ca256433..f3a8aed1ac 100644
--- a/driver/argmax_driver.hpp
+++ b/driver/argmax_driver.hpp
@@ -54,7 +54,7 @@ int32_t mloArgmaxForwardRunHost(miopenTensorDescriptor_t inputDesc,
 
     int32_t reduce_size = static_cast<int32_t>(input_dims[dim]);
     auto output_numel =
-        std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies<int64_t>());
+        std::accumulate(output_dims.begin(), output_dims.end(), 1LL, std::multiplies<int64_t>());
 
     auto inner_size = std::accumulate(
         input_dims.begin() + dim + 1, input_dims.end(), 1ULL, std::multiplies<uint64_t>());
diff --git a/driver/conv_driver.hpp b/driver/conv_driver.hpp
index 5e5ca32047..6e9e5fb0b6 100644
--- a/driver/conv_driver.hpp
+++ b/driver/conv_driver.hpp
@@ -205,7 +205,7 @@ class GpumemTensor
             return;
         }
 
-        for(int i = 0; i < sz; ++i)
+        for(size_t i = 0; i < sz; ++i)
         {
             /// \anchor move_rand
             /// Generate random value, even if buffer is unused. This provides the same
@@ -1559,7 +1559,7 @@ int ConvDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
                 if(!biasFileName.empty())
                     read = readBufferFromFile<float>(b_int8.data(), b_sz, biasFileName.c_str());
                 if(!read)
-                    for(int i = 0; i < b_sz; i++)
+                    for(size_t i = 0; i < b_sz; ++i)
                         b_int8[i] = static_cast<float>(i % 8) + prng::gen_canonical<float>();
             }
             std::ignore = b.AllocOnDeviceAndInit(q, ctx, b_sz, b_int8);
@@ -1602,15 +1602,20 @@ int ConvDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
 
             if(!is_gpualloc)
             {
-                for(int i = 0; i < b_sz; i++)
+                for(size_t i = 0; i < b_sz; ++i)
                 {
                     if(!b_read)
                     {
-                        b.GetVector()[i] = static_cast<Tgpu>(i % 8)                         //
+                        /// (i % 8) can't be converted to F8 type as there is no suitable
+                        /// conversion, but we have conversions from int and from uint8_t.
+                        /// int is not good as it would produce negative results
+                        /// after truncation of size_t, while we want positive values.
+                        /// uint8_t is fine because (i % 8) fits into 3 bits.
+                        b.GetVector()[i] = static_cast<Tgpu>(static_cast<uint8_t>(i) % 8)   //
                                            + (is_fp8 ? prng::gen_A_to_B(Data_min, Data_max) //
                                                      : prng::gen_canonical<Tgpu>());
                     }
-                    db.GetVector()[i] = static_cast<Tgpu>(i % 8)                         //
+                    db.GetVector()[i] = static_cast<Tgpu>(static_cast<uint8_t>(i) % 8)   //
                                         + (is_fp8 ? prng::gen_A_to_B(Data_min, Data_max) //
                                                   : prng::gen_canonical<Tgpu>());
                 }
@@ -2415,7 +2420,7 @@ int ConvDriver<Tgpu, Tref>::RunForwardGPUReference()
         {
             auto out_tmp = tensor<Tgpu>(miopen::deref(outputTensor));
             out.CopyFromDeviceToHost(GetStream(), out_tmp);
-            for(int i = 0; i < out_tmp.data.size(); i++)
+            for(size_t i = 0; i < out_tmp.data.size(); ++i)
             {
                 outhost.data[i] = static_cast<Tref>(out_tmp.data[i]);
             }
@@ -3326,7 +3331,7 @@ int ConvDriver<Tgpu, Tref>::RunBackwardWeightsGPUReference()
         {
             auto dwei_tmp = tensor<Tgpu>(miopen::deref(weightTensor));
             dwei.CopyFromDeviceToHost(GetStream(), dwei_tmp);
-            for(int i = 0; i < dwei_tmp.data.size(); i++)
+            for(size_t i = 0; i < dwei_tmp.data.size(); ++i)
             {
                 dwei_host.data[i] = static_cast<Tref>(dwei_tmp.data[i]);
             }
@@ -3377,7 +3382,7 @@ int ConvDriver<Tgpu, Tref>::RunBackwardDataGPUReference()
         {
             auto din_tmp = tensor<Tgpu>(miopen::deref(inputTensor));
             din.CopyFromDeviceToHost(GetStream(), din_tmp);
-            for(int i = 0; i < din_tmp.data.size(); i++)
+            for(size_t i = 0; i < din_tmp.data.size(); ++i)
             {
                 din_host.data[i] = static_cast<Tref>(din_tmp.data[i]);
             }
@@ -3433,6 +3438,10 @@ std::string ConvDriver<Tgpu, Tref>::GetVerificationCacheFileName(
         {
             return "int8";
         }
+        if(std::is_same<decltype(type), int32_t>::value)
+        {
+            return "int32";
+        }
         else if(std::is_same<decltype(type), float16>::value)
         {
             return "float16";
diff --git a/driver/ctc_driver.hpp b/driver/ctc_driver.hpp
index fe9c27bdb2..c7fa9f02e6 100644
--- a/driver/ctc_driver.hpp
+++ b/driver/ctc_driver.hpp
@@ -251,7 +251,7 @@ template <typename Tgpu, typename Tref>
 int CTCDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
 {
     size_t probs_sz  = batch_size * (num_class + 1) * max_time_step;
-    size_t labels_sz = std::accumulate(labelLengths.begin(), labelLengths.end(), 0);
+    size_t labels_sz = std::accumulate(labelLengths.begin(), labelLengths.end(), 0ULL);
     size_t workSpaceSize;
     size_t workSpaceSizeCPU;
 
diff --git a/driver/rnn_driver.hpp b/driver/rnn_driver.hpp
index f93b719336..03da917e0a 100644
--- a/driver/rnn_driver.hpp
+++ b/driver/rnn_driver.hpp
@@ -618,7 +618,7 @@ int RNNDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
     int nseq              = inflags.GetValueInt("seq_len");
     std::vector<int> in_n = GetInputTensorLengthsFromCmdLine();
     std::size_t inputBatchLenSum;
-    inputBatchLenSum = std::accumulate(in_n.begin(), in_n.begin() + nseq, 0);
+    inputBatchLenSum = std::accumulate(in_n.begin(), in_n.begin() + nseq, 0ULL);
 
     int hid_h = inflags.GetValueInt("hid_h");
     int layer = inflags.GetValueInt("num_layer");
diff --git a/driver/rnn_seq_driver.hpp b/driver/rnn_seq_driver.hpp
index 2d5636e062..8d5b720960 100644
--- a/driver/rnn_seq_driver.hpp
+++ b/driver/rnn_seq_driver.hpp
@@ -766,7 +766,7 @@ inline size_t Get3DNoVECTensorSize(miopenTensorDescriptor_t& tensor)
     assert(miopen::deref(tensor).IsPacked() &&
            "GetTensorSize should not be used on an unpacked tensor.");
     const auto len = GetTensorLengths(tensor);
-    size_t sz      = std::accumulate(len.begin(), len.end(), 1, std::multiplies<size_t>());
+    size_t sz      = std::accumulate(len.begin(), len.end(), 1ULL, std::multiplies<size_t>());
     return sz;
 }
 
@@ -827,7 +827,7 @@ int RNNSeqDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
     const std::vector<int> out_lens = GetOutputTensorLengthsFromCmdLine();
 
     const size_t vectors_cnt_host =
-        std::accumulate(sorted_seq_lens.begin(), sorted_seq_lens.end(), 0);
+        std::accumulate(sorted_seq_lens.begin(), sorted_seq_lens.end(), 0ULL);
     const size_t vectors_cnt_gpu =
         io_layout == miopenRNNDataSeqMajorNotPadded ? vectors_cnt_host : in_lens[0] * in_lens[1];
 
diff --git a/driver/sum_driver.hpp b/driver/sum_driver.hpp
index b348d3a22f..03589e29e6 100644
--- a/driver/sum_driver.hpp
+++ b/driver/sum_driver.hpp
@@ -58,7 +58,7 @@ int32_t mloSumForwardRunHost(miopenTensorDescriptor_t inputDesc,
 
     auto reduce_size = input_dims[dim];
     auto output_numel =
-        std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies<int64_t>());
+        std::accumulate(output_dims.begin(), output_dims.end(), 1LL, std::multiplies<int64_t>());
 
     auto inner_size = 1ULL;
     for(int32_t i = dim + 1; i < input_dims.size(); i++)
diff --git a/driver/tensor_driver.hpp b/driver/tensor_driver.hpp
index 71b7e9813b..f6868fab98 100644
--- a/driver/tensor_driver.hpp
+++ b/driver/tensor_driver.hpp
@@ -181,6 +181,14 @@ inline int SetTensorNd(miopenTensorDescriptor_t t,
     return miopenSetTensorDescriptor(t, data_type, len.size(), len.data(), strides.data());
 }
 
+inline int SetTensorNd(miopenTensorDescriptor_t t,
+                       std::vector<std::size_t>& len,
+                       std::vector<std::size_t>& strides,
+                       miopenDataType_t data_type = miopenFloat)
+{
+    return miopenSetTensorDescriptorV2(t, data_type, len.size(), len.data(), strides.data());
+}
+
 inline int SetTensorNd(miopenTensorDescriptor_t t,
                        std::vector<int>& len,
                        const std::string& layout,
@@ -208,10 +216,10 @@ inline int SetTensorNd(miopenTensorDescriptor_t t,
         return SetTensorNd(t, len, data_type);
     }
 
-    std::vector<int> strides;
-    miopen::tensor_layout_to_strides(len, len_layout, layout, strides);
-
-    return SetTensorNd(t, len, strides, data_type);
+    std::vector<std::size_t> strides2;
+    std::vector<std::size_t> len2(len.cbegin(), len.cend());
+    miopen::tensor_layout_to_strides(len2, len_layout, layout, strides2);
+    return SetTensorNd(t, len2, strides2, data_type);
 }
 
 // This function ignores tensor strides completely and its result should not be interpreted as
@@ -222,8 +230,8 @@ inline size_t GetTensorSize(const miopenTensorDescriptor_t& tensor)
 {
     assert(miopen::deref(tensor).IsPacked() &&
            "GetTensorSize should not be used on an unpacked tensor.");
-    const auto len          = GetTensorLengths(tensor);
-    const auto vectorLength = GetTensorVectorLength(tensor);
+    const auto len            = GetTensorLengths(tensor);
+    const size_t vectorLength = GetTensorVectorLength(tensor);
     size_t sz = std::accumulate(len.begin(), len.end(), vectorLength, std::multiplies<size_t>());
 
     return sz;
diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h
index a06dc8c2a4..89a3060c9e 100644
--- a/include/miopen/miopen.h
+++ b/include/miopen/miopen.h
@@ -749,6 +749,16 @@ MIOPEN_EXPORT miopenStatus_t miopenSetTensorDescriptor(miopenTensorDescriptor_t
                                                        const int* dimsA,
                                                        const int* stridesA);
 
+#ifdef MIOPEN_BETA_API
+/*! @copydoc miopenSetTensorDescriptor()
+ */
+MIOPEN_EXPORT miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t tensorDesc,
+                                                         miopenDataType_t dataType,
+                                                         int nbDims,
+                                                         const size_t* dimsA,
+                                                         const size_t* stridesA);
+#endif
+
 #ifdef MIOPEN_BETA_API
 /*! @brief Set the tensor cast type
  *
diff --git a/src/include/miopen/conv/problem_description.hpp b/src/include/miopen/conv/problem_description.hpp
index 5ff81e3833..8bba1ba3c5 100644
--- a/src/include/miopen/conv/problem_description.hpp
+++ b/src/include/miopen/conv/problem_description.hpp
@@ -348,6 +348,12 @@ struct ProblemDescription : ProblemDescriptionBase
         return in.AllDimsFitIntoInt() && weights.AllDimsFitIntoInt() && out.AllDimsFitIntoInt();
     }
 
+    bool AllTensorsLengthsFitIntoInt() const
+    {
+        return in.AllLengthsFitIntoInt() && weights.AllLengthsFitIntoInt() &&
+               out.AllLengthsFitIntoInt();
+    }
+
     void HeuristicUpdateLayouts();
 
     void MakeNetworkConfig(std::string& conf_key) const;
diff --git a/src/include/miopen/oclkernel.hpp b/src/include/miopen/oclkernel.hpp
index 8c00c64640..6b42aa2b3b 100644
--- a/src/include/miopen/oclkernel.hpp
+++ b/src/include/miopen/oclkernel.hpp
@@ -149,7 +149,7 @@ class OCLKernel
     {
         assert(!gdims.empty() && gdims.size() <= 3);
         assert(!ldims.empty() && ldims.size() <= 3);
-        if(std::accumulate(ldims.begin(), ldims.end(), 1, std::multiplies<size_t>{}) >
+        if(std::accumulate(ldims.begin(), ldims.end(), 1ULL, std::multiplies<size_t>{}) >
            256) // FIXME: get ldims limit from runtime
         {
             std::fill(ldims.begin(), ldims.end(), 0);
diff --git a/src/include/miopen/rnn_util.hpp b/src/include/miopen/rnn_util.hpp
index d3a95b598f..92876b8a9a 100644
--- a/src/include/miopen/rnn_util.hpp
+++ b/src/include/miopen/rnn_util.hpp
@@ -193,7 +193,7 @@ struct RNNTensorPaddingConverter
         size_t total_batch = std::accumulate(
             desc_array.data,
             desc_array.data + desc_array.size(),
-            0,
+            0ULL,
             [](size_t x, miopenTensorDescriptor_t y) { return x + deref(y).GetLengths()[0]; });
 
         return GetTempPackedBuffersSpace(rnn_desc, total_batch, desc_array[0].GetLengths()[1]);
@@ -255,7 +255,7 @@ struct RNNTensorBaseLayoutConverter
         size_t total_batch = std::accumulate(
             desc_array.data,
             desc_array.data + desc_array.size(),
-            0,
+            0ULL,
             [](size_t x, miopenTensorDescriptor_t y) { return x + deref(y).GetLengths()[0]; });
 
         return GetTempPackedBuffersSpace(rnn_desc, total_batch, desc_array[0].GetLengths()[1]);
diff --git a/src/include/miopen/tensor.hpp b/src/include/miopen/tensor.hpp
index 658cbcd583..437668cfcb 100644
--- a/src/include/miopen/tensor.hpp
+++ b/src/include/miopen/tensor.hpp
@@ -171,10 +171,19 @@ struct MIOPEN_EXPORT TensorDescriptor : miopenTensorDescriptor
 
     // Use only for external API
     static TensorDescriptor MakeDescriptor(miopenDataType_t t, const int* plens, int size);
+    static TensorDescriptor MakeDescriptor(miopenDataType_t t, const std::size_t* plens, int size);
     static TensorDescriptor
     MakeDescriptor(miopenDataType_t t, miopenTensorLayout_t layout, const int* plens, int size);
+    static TensorDescriptor MakeDescriptor(miopenDataType_t t,
+                                           miopenTensorLayout_t layout,
+                                           const std::size_t* plens,
+                                           int size);
     static TensorDescriptor
     MakeDescriptor(miopenDataType_t t, const int* plens, const int* pstrides, int size);
+    static TensorDescriptor MakeDescriptor(miopenDataType_t t,
+                                           const std::size_t* plens,
+                                           const std::size_t* pstrides,
+                                           int size);
 
     bool IsVectorized() const;
 
@@ -205,7 +214,10 @@ struct MIOPEN_EXPORT TensorDescriptor : miopenTensorDescriptor
     }
 
     bool IsPacked() const;
+    /// Checks all lengths and strides.
     bool AllDimsFitIntoInt() const;
+    /// Checks only lengths.
+    bool AllLengthsFitIntoInt() const;
 
     bool operator==(const TensorDescriptor& rhs) const;
     bool operator!=(const TensorDescriptor& rhs) const;
diff --git a/src/include/miopen/tensor_layout.hpp b/src/include/miopen/tensor_layout.hpp
index 93c09e171a..f5659d7dd3 100644
--- a/src/include/miopen/tensor_layout.hpp
+++ b/src/include/miopen/tensor_layout.hpp
@@ -62,18 +62,21 @@ void tensor_layout_to_strides(const std::vector<T>& len,
                        }
                        return std::accumulate(layout.begin() + pos + 1,
                                               layout.end(),
-                                              1,
+                                              static_cast<T>(1),
                                               [&dim_to_len](T accumulator, char l) {
                                                   return accumulator * dim_to_len[l];
                                               });
                    });
 }
 
+/// \brief Version for vectorized layouts.
+///
+/// \todo Generalize with non-vectorized version, 90% of code is the same.
 template <typename T>
 void tensor_layout_to_strides(const std::vector<T>& len,
                               const std::string& len_layout,
                               const std::string& layout,
-                              const int vector,
+                              const std::size_t vector_size,
                               std::vector<T>& strides)
 {
     const std::string base_layout = layout.substr(0, len.size());
@@ -91,7 +94,7 @@ void tensor_layout_to_strides(const std::vector<T>& len,
         len_layout.begin(),
         len_layout.end(),
         std::back_inserter(strides),
-        [&base_layout, &vector, &dim_to_len](char cur_layout_char) {
+        [&base_layout, &vector_size, &dim_to_len](char cur_layout_char) {
             auto pos = base_layout.find(cur_layout_char);
             if(pos == std::string::npos)
             {
@@ -100,7 +103,7 @@ void tensor_layout_to_strides(const std::vector<T>& len,
             return std::accumulate(
                 base_layout.begin() + pos + 1,
                 base_layout.end(),
-                vector,
+                vector_size,
                 [&dim_to_len](T accumulator, char l) { return accumulator * dim_to_len[l]; });
         });
 }
diff --git a/src/mlo_dir_conv.cpp b/src/mlo_dir_conv.cpp
index c498414d0f..55b450e7fc 100644
--- a/src/mlo_dir_conv.cpp
+++ b/src/mlo_dir_conv.cpp
@@ -49,15 +49,6 @@
 #include <miopen/sqlite_db.hpp>
 #endif
 
-// Only select the first applicable igemm solver due to long compilation time
-// (JIRA SWDEV-227826)
-/// \todo enable all applicable solvers of igemm after fixing slow compilation
-#define WORKAROUND_SWDEV_227826 0
-
-#if WORKAROUND_SWDEV_227826
-MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS)
-#endif
-
 miopen::PerformanceDb miopen::GetDb(const miopen::ExecutionContext& ctx)
 {
     return {DbKinds::PerfDb, ctx.GetPerfDbPath(), ctx.GetUserPerfDbPath()};
@@ -260,14 +251,7 @@ std::vector<std::pair<std::string, size_t>>
 FindAllImplicitGemmWorkspaceSizes(const miopen::ExecutionContext& ctx,
                                   const miopen::conv::ProblemDescription& problem)
 {
-#if WORKAROUND_SWDEV_227826
-    if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS)))
-        return GetImplicitGemmSolvers().GetWorkspaceSizes(ctx, problem);
-    else
-        return GetImplicitGemmSolvers().GetWorkspaceSizes(ctx, problem, 1);
-#else
     return GetImplicitGemmSolvers().GetWorkspaceSizes(ctx, problem);
-#endif
 }
 
 std::vector<miopen::solver::ConvSolution>
@@ -275,15 +259,7 @@ FindAllImplicitGemmSolutions(const miopen::ExecutionContext& ctx,
                              const miopen::conv::ProblemDescription& problem,
                              const miopen::AnyInvokeParams& invoke_ctx)
 {
-#if WORKAROUND_SWDEV_227826
-    if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS)))
-        return GetImplicitGemmSolvers().SearchForAllSolutions(ctx, problem, GetDb(ctx), invoke_ctx);
-    else
-        return GetImplicitGemmSolvers().SearchForAllSolutions(
-            ctx, problem, GetDb(ctx), invoke_ctx, 1);
-#else
     return GetImplicitGemmSolvers().SearchForAllSolutions(ctx, problem, GetDb(ctx), invoke_ctx);
-#endif
 }
 
 std::vector<miopen::solver::ConvSolution>
@@ -313,14 +289,7 @@ std::vector<std::pair<std::string, size_t>>
 FindImplicitGemmWrWWorkspaceSizes(const miopen::ExecutionContext& ctx,
                                   const miopen::conv::ProblemDescription& problem)
 {
-#if WORKAROUND_SWDEV_227826
-    if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS)))
-        return GetImplicitGemmWrWSolvers().GetWorkspaceSizes(ctx, problem);
-    else
-        return GetImplicitGemmWrWSolvers().GetWorkspaceSizes(ctx, problem, 1);
-#else
     return GetImplicitGemmWrWSolvers().GetWorkspaceSizes(ctx, problem);
-#endif
 }
 
 std::vector<miopen::solver::ConvSolution>
@@ -328,16 +297,7 @@ FindImplicitGemmWrWAllSolutions(const miopen::ExecutionContext& ctx,
                                 const miopen::conv::ProblemDescription& problem,
                                 const miopen::AnyInvokeParams& invoke_ctx)
 {
-#if WORKAROUND_SWDEV_227826
-    if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS)))
-        return GetImplicitGemmWrWSolvers().SearchForAllSolutions(
-            ctx, problem, GetDb(ctx), invoke_ctx);
-    else
-        return GetImplicitGemmWrWSolvers().SearchForAllSolutions(
-            ctx, problem, GetDb(ctx), invoke_ctx, 1);
-#else
     return GetImplicitGemmWrWSolvers().SearchForAllSolutions(ctx, problem, GetDb(ctx), invoke_ctx);
-#endif
 }
 
 std::vector<miopen::solver::ConvSolution>
diff --git a/src/ocl/tensorocl.cpp b/src/ocl/tensorocl.cpp
index 4842ad297b..6fd8a172cf 100644
--- a/src/ocl/tensorocl.cpp
+++ b/src/ocl/tensorocl.cpp
@@ -1432,9 +1432,8 @@ void SetTensor(const Handle& handle,
 #ifndef NDEBUG
     if(yDesc.GetNumDims() != yDesc_flat.GetNumDims())
     {
-        MIOPEN_LOG_I2(__func__ << std::endl
-                               << "real descriptor: " << yDesc << std::endl
-                               << "flat descriptor: " << yDesc_flat << std::endl);
+        MIOPEN_LOG_I2("real descriptor: " << yDesc);
+        MIOPEN_LOG_I2("flat descriptor: " << yDesc_flat);
     }
 #endif
 
@@ -1586,9 +1585,8 @@ void ScaleTensor(const Handle& handle,
 #ifndef NDEBUG
     if(yDesc.GetNumDims() != yDesc_flat.GetNumDims())
     {
-        MIOPEN_LOG_I2(__func__ << std::endl
-                               << "real descriptor: " << yDesc << std::endl
-                               << "flat descriptor: " << yDesc_flat << std::endl);
+        MIOPEN_LOG_I2("real descriptor: " << yDesc);
+        MIOPEN_LOG_I2("flat descriptor: " << yDesc_flat);
     }
 #endif
 
@@ -1765,11 +1763,10 @@ void CopyTensor(const Handle& handle,
 #ifndef NDEBUG
     if(srcDesc.GetNumDims() != srcDesc_flat.GetNumDims())
     {
-        MIOPEN_LOG_I2(__func__ << std::endl
-                               << "src real descriptor: " << srcDesc << std::endl
-                               << "src flat descriptor: " << srcDesc_flat << std::endl
-                               << "dst real descriptor: " << dstDesc << std::endl
-                               << "dst flat descriptor: " << dstDesc_flat << std::endl);
+        MIOPEN_LOG_I2("src real descriptor: " << srcDesc);
+        MIOPEN_LOG_I2("src flat descriptor: " << srcDesc_flat);
+        MIOPEN_LOG_I2("dst real descriptor: " << dstDesc);
+        MIOPEN_LOG_I2("dst flat descriptor: " << dstDesc_flat);
     }
 #endif
 
@@ -1975,11 +1972,10 @@ void CastTensor(const Handle& handle,
 #ifndef NDEBUG
     if(srcDesc.GetNumDims() != srcDesc_flat.GetNumDims())
     {
-        MIOPEN_LOG_I2(__func__ << std::endl
-                               << "src real descriptor: " << srcDesc << std::endl
-                               << "src flat descriptor: " << srcDesc_flat << std::endl
-                               << "dst real descriptor: " << dstDesc << std::endl
-                               << "dst flat descriptor: " << dstDesc_flat << std::endl);
+        MIOPEN_LOG_I2("src real descriptor: " << srcDesc);
+        MIOPEN_LOG_I2("src flat descriptor: " << srcDesc_flat);
+        MIOPEN_LOG_I2("dst real descriptor: " << dstDesc);
+        MIOPEN_LOG_I2("dst flat descriptor: " << dstDesc_flat);
     }
 #endif
 
@@ -2259,16 +2255,14 @@ void TransformTensor(const Handle& handle,
 #ifndef NDEBUG
         if(xDesc.GetNumDims() != xDesc_flat.GetNumDims())
         {
-            MIOPEN_LOG_I2(__func__ << std::endl
-                                   << "real descriptor: " << xDesc << std::endl
-                                   << "flat descriptor: " << xDesc_flat << std::endl);
+            MIOPEN_LOG_I2("x real descriptor: " << xDesc);
+            MIOPEN_LOG_I2("x flat descriptor: " << xDesc_flat);
         }
 
         if(yDesc.GetNumDims() != yDesc_flat.GetNumDims())
         {
-            MIOPEN_LOG_I2(__func__ << std::endl
-                                   << "real descriptor: " << yDesc << std::endl
-                                   << "flat descriptor: " << yDesc_flat << std::endl);
+            MIOPEN_LOG_I2("y real descriptor: " << yDesc);
+            MIOPEN_LOG_I2("y flat descriptor: " << yDesc_flat);
         }
 #endif
 
diff --git a/src/rnn.cpp b/src/rnn.cpp
index 6caf17a742..c23b6a0bc7 100644
--- a/src/rnn.cpp
+++ b/src/rnn.cpp
@@ -527,7 +527,7 @@ size_t RNNDescriptor::GetWorkspaceSize(Handle& handle,
 
     std::size_t total_sequence_len = 0;
     total_sequence_len             = std::accumulate(
-        xDesc.data, xDesc.data + seqLength, 0, [](size_t x, miopenTensorDescriptor_t y) {
+        xDesc.data, xDesc.data + seqLength, 0ULL, [](size_t x, miopenTensorDescriptor_t y) {
             return x + deref(y).GetLengths()[0];
         });
 
@@ -589,7 +589,7 @@ size_t RNNDescriptor::GetReserveSize(Handle& /* handle */,
     }
     std::size_t inputBatchLenSum = 0;
     inputBatchLenSum             = std::accumulate(
-        xDesc.data, xDesc.data + seqLength, 0, [](size_t x, miopenTensorDescriptor_t y) {
+        xDesc.data, xDesc.data + seqLength, 0ULL, [](size_t x, miopenTensorDescriptor_t y) {
             return x + deref(y).GetLengths()[0];
         });
     return GetReserveSize(inputBatchLenSum);
@@ -646,7 +646,7 @@ size_t RNNDescriptor::GetRNNInputSuperTensorSize(Handle& /* handle */,
     if(paddingMode == miopenRNNIONotPadded)
     {
         inputBatchLenSum = std::accumulate(
-            xDesc.data, xDesc.data + seqLength, 0, [](size_t x, miopenTensorDescriptor_t y) {
+            xDesc.data, xDesc.data + seqLength, 0ULL, [](size_t x, miopenTensorDescriptor_t y) {
                 return x + deref(y).GetLengths()[0];
             });
     }
diff --git a/src/solver/conv_direct_naive_conv_bwd.cpp b/src/solver/conv_direct_naive_conv_bwd.cpp
index abd286509d..5d8ed5d03d 100644
--- a/src/solver/conv_direct_naive_conv_bwd.cpp
+++ b/src/solver/conv_direct_naive_conv_bwd.cpp
@@ -49,7 +49,7 @@ bool ConvDirectNaiveConvBwd::IsApplicable(const ExecutionContext& ctx,
 
     if(!problem.IsDirectionBackwardData())
         return false;
-    if(!problem.AllTensorsDimsFitIntoInt())
+    if(!problem.AllTensorsLengthsFitIntoInt())
         return false;
     if(!problem.IsLayoutDefault() && !problem.IsLayoutNHWC())
         return false;
diff --git a/src/solver/conv_direct_naive_conv_fwd.cpp b/src/solver/conv_direct_naive_conv_fwd.cpp
index 8e38537be4..c9ca46ffeb 100644
--- a/src/solver/conv_direct_naive_conv_fwd.cpp
+++ b/src/solver/conv_direct_naive_conv_fwd.cpp
@@ -56,7 +56,7 @@ bool ConvDirectNaiveConvFwd::IsApplicable(const ExecutionContext& ctx,
     if(!problem.IsDirectionForward())
         return false;
 
-    if(!problem.AllTensorsDimsFitIntoInt())
+    if(!problem.AllTensorsLengthsFitIntoInt())
         return false;
 
     if(problem.IsTensorsCasted())
diff --git a/src/solver/conv_direct_naive_conv_wrw.cpp b/src/solver/conv_direct_naive_conv_wrw.cpp
index 95e7e75f7b..c937ec63cc 100644
--- a/src/solver/conv_direct_naive_conv_wrw.cpp
+++ b/src/solver/conv_direct_naive_conv_wrw.cpp
@@ -56,7 +56,7 @@ bool ConvDirectNaiveConvWrw::IsApplicable(const ExecutionContext& ctx,
 
     if(!problem.IsDirectionBackwardWrW())
         return false;
-    if(!problem.AllTensorsDimsFitIntoInt())
+    if(!problem.AllTensorsLengthsFitIntoInt())
         return false;
     if(problem.IsTensorsCasted())
     {
diff --git a/src/tensor.cpp b/src/tensor.cpp
index 484b2cda76..1d2ed405af 100644
--- a/src/tensor.cpp
+++ b/src/tensor.cpp
@@ -272,6 +272,12 @@ TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t, const int*
     return MakeDescriptor(t, GetDefaultLayout(), plens, size);
 }
 
+TensorDescriptor
+TensorDescriptor::MakeDescriptor(miopenDataType_t t, const std::size_t* plens, int size)
+{
+    return MakeDescriptor(t, GetDefaultLayout(), plens, size);
+}
+
 TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t,
                                                   miopenTensorLayout_t layout,
                                                   const int* plens,
@@ -283,6 +289,17 @@ TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t,
     return {t, layout, std::vector<int>(plens, plens + size)};
 }
 
+TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t,
+                                                  miopenTensorLayout_t layout,
+                                                  const std::size_t* plens,
+                                                  int size)
+{
+    if(plens == nullptr || size <= 0)
+        MIOPEN_THROW(miopenStatusInvalidValue);
+
+    return {t, layout, std::vector<std::size_t>(plens, plens + size)};
+}
+
 TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t,
                                                   const int* plens,
                                                   const int* pstrides,
@@ -294,6 +311,19 @@ TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t,
     return {t, std::vector<int>(plens, plens + size), std::vector<int>(pstrides, pstrides + size)};
 }
 
+TensorDescriptor TensorDescriptor::MakeDescriptor(miopenDataType_t t,
+                                                  const std::size_t* plens,
+                                                  const std::size_t* pstrides,
+                                                  int size)
+{
+    if(plens == nullptr || pstrides == nullptr || size <= 0)
+        MIOPEN_THROW(miopenStatusInvalidValue);
+
+    return {t,
+            std::vector<std::size_t>(plens, plens + size),
+            std::vector<std::size_t>(pstrides, pstrides + size)};
+}
+
 void TensorDescriptor::CalculateStrides()
 {
     if(lens.empty())
@@ -425,7 +455,7 @@ std::size_t TensorDescriptor::GetNumBytes() const
 
 bool TensorDescriptor::IsPacked() const { return this->packed; }
 
-bool TensorDescriptor::AllDimsFitIntoInt() const
+bool TensorDescriptor::AllLengthsFitIntoInt() const
 {
     if(std::any_of(lens.cbegin(), lens.cend(), [](std::size_t x) {
            return x > std::numeric_limits<int>::max();
@@ -433,6 +463,13 @@ bool TensorDescriptor::AllDimsFitIntoInt() const
     {
         return false;
     }
+    return true;
+}
+
+bool TensorDescriptor::AllDimsFitIntoInt() const
+{
+    if(!AllLengthsFitIntoInt())
+        return false;
     if(std::any_of(strides.cbegin(), strides.cend(), [](std::size_t x) {
            return x > std::numeric_limits<int>::max();
        }))
diff --git a/src/tensor_api.cpp b/src/tensor_api.cpp
index 1b0209ecd2..494994b1d6 100644
--- a/src/tensor_api.cpp
+++ b/src/tensor_api.cpp
@@ -199,6 +199,33 @@ extern "C" miopenStatus_t miopenSetTensorDescriptor(miopenTensorDescriptor_t ten
     });
 }
 
+extern "C" miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t tensorDesc,
+                                                      miopenDataType_t dataType,
+                                                      int nbDims,
+                                                      const size_t* dimsA,
+                                                      const size_t* stridesA)
+{
+    if(miopen::IsLoggingFunctionCalls())
+    {
+        const miopen::logger::CArray<size_t, int> dim(dimsA, nbDims);
+        const miopen::logger::CArray<size_t, int> stride(stridesA, nbDims);
+        MIOPEN_LOG_FUNCTION(tensorDesc, dataType, nbDims, dim.values, stride.values);
+    }
+
+    return miopen::try_([&] {
+        if(stridesA == nullptr)
+        {
+            miopen::deref(tensorDesc) =
+                miopen::TensorDescriptor::MakeDescriptor(dataType, dimsA, nbDims);
+        }
+        else
+        {
+            miopen::deref(tensorDesc) =
+                miopen::TensorDescriptor::MakeDescriptor(dataType, dimsA, stridesA, nbDims);
+        }
+    });
+}
+
 extern "C" miopenStatus_t miopenSetTensorCastType(miopenTensorDescriptor_t tensorDesc,
                                                   miopenDataType_t cast_type)
 {
diff --git a/test/conv3d.hpp b/test/conv3d.hpp
index 2b49c0f10e..33542587f4 100644
--- a/test/conv3d.hpp
+++ b/test/conv3d.hpp
@@ -56,5 +56,11 @@ struct conv3d_driver : conv_driver<T>
         this->add(this->in_layout, "in_layout", this->generate_data({"NCDHW"}));
         this->add(this->fil_layout, "fil_layout", this->generate_data({"NCDHW"}));
         this->add(this->out_layout, "out_layout", this->generate_data({"NCDHW"}));
+        this->add(this->deterministic, "deterministic", this->generate_data({false}));
+        this->add(this->tensor_vect, "tensor_vect", this->generate_data({0}));
+        this->add(this->vector_length, "vector_length", this->generate_data({1}));
+        // Only valid for int8 input and weights
+        this->add(this->output_type, "output_type", this->generate_data({"int32"}));
+        this->add(this->int8_vectorize, "int8_vectorize", this->generate_data({false}));
     }
 };
diff --git a/test/conv3d_find2.cpp b/test/conv3d_find2.cpp
index 0a36c8958a..198b6890cc 100644
--- a/test/conv3d_find2.cpp
+++ b/test/conv3d_find2.cpp
@@ -56,6 +56,12 @@ struct conv3d_find2_driver : conv_driver<T, ConvApi::Find_2_0>
         this->add(this->in_layout, "in_layout", this->generate_data({"NCDHW"}));
         this->add(this->fil_layout, "fil_layout", this->generate_data({"NCDHW"}));
         this->add(this->out_layout, "out_layout", this->generate_data({"NCDHW"}));
+        this->add(this->deterministic, "deterministic", this->generate_data({false}));
+        this->add(this->tensor_vect, "tensor_vect", this->generate_data({0}));
+        this->add(this->vector_length, "vector_length", this->generate_data({1}));
+        // Only valid for int8 input and weights
+        this->add(this->output_type, "output_type", this->generate_data({"int32"}));
+        this->add(this->int8_vectorize, "int8_vectorize", this->generate_data({false}));
     }
 };
 
diff --git a/test/conv_common.hpp b/test/conv_common.hpp
index acb29dc29f..9bcba030fc 100644
--- a/test/conv_common.hpp
+++ b/test/conv_common.hpp
@@ -2152,9 +2152,12 @@ struct conv_driver : test_driver
         bool is_bfloat16 =
             (input.desc.GetType() == miopenBFloat16 && weights.desc.GetType() == miopenBFloat16);
 
-        // bfloat16 is not supported for conv3d
         if(is_bfloat16 && !(filter.spatialDim == 2))
+        {
+            show_command();
+            std::cout << "Skipped: bfloat16 is supported for 2D conv only" << std::endl;
             return;
+        }
 
         if(((filter.mode == miopenTranspose) &&
             ((filter.group_count == 1 && in_c_len == wei_k_len) ||
@@ -2171,6 +2174,8 @@ struct conv_driver : test_driver
                 {
                     if(miopen::any_of(filter.GetConvStrides(), [](auto v) { return v == 0; }))
                     {
+                        show_command();
+                        std::cout << "Skipped: stride[i] == 0" << std::endl;
                         return;
                     }
 
@@ -2198,13 +2203,19 @@ struct conv_driver : test_driver
 
                     if(miopen::any_of(out_spatial_len, [](auto v) { return v <= 0; }))
                     {
+                        show_command();
+                        std::cout << "Skipped: out_spatial_len[i] <= 0" << std::endl;
                         return;
                     }
                 }
                 else if(filter.paddingMode == miopenPaddingValid)
                 {
                     if(miopen::any_of(filter.GetConvStrides(), [](auto v) { return v == 0; }))
+                    {
+                        show_command();
+                        std::cout << "Skipped: stride[i] == 0" << std::endl;
                         return;
+                    }
 
                     std::vector<ptrdiff_t> out_spatial_len(spatial_dim);
 
@@ -2220,6 +2231,8 @@ struct conv_driver : test_driver
 
                     if(miopen::any_of(out_spatial_len, [](auto v) { return v <= 0; }))
                     {
+                        show_command();
+                        std::cout << "Skipped: out_spatial_len[i] <= 0" << std::endl;
                         return;
                     }
                 }
@@ -2293,16 +2306,6 @@ struct conv_driver : test_driver
                 }
 #endif
 
-                // bwd53 kernel (large images supported) doesnt support stride !=1 and dilation and
-                // pad.
-                if(filter.GetSpatialDimension() == 2 && in_spatial_len[1] >= 2048 &&
-                   ((filter.GetConvStrides()[0] != 1) || (filter.GetConvStrides()[1] != 1) ||
-                    (filter.GetConvDilations()[0] != 1) || (filter.GetConvDilations()[1] != 1) ||
-                    (filter.GetConvPads()[1] != 0) || (filter.GetConvPads()[0] != 0)))
-                {
-                    return;
-                }
-
                 input.generate(gen_positive_value);
                 output.generate(gen_positive_value);
                 weights.generate(gen_sign_value);
@@ -2429,6 +2432,12 @@ struct conv_driver : test_driver
                                 search,
                                 int8_vectorize});
                         }
+                        else
+                        {
+                            show_command();
+                            std::cout << "FAILED: bad output_type: '" << output_type << '\''
+                                      << std::endl;
+                        }
                     }
                     else
                     {
diff --git a/test/cpu_argmax.hpp b/test/cpu_argmax.hpp
index c70487019e..08f40c9c01 100644
--- a/test/cpu_argmax.hpp
+++ b/test/cpu_argmax.hpp
@@ -36,7 +36,7 @@ void cpu_argmax_forward(tensor<T> input, tensor<int>& ref_output, int32_t dim)
 
     auto reduce_size = input_dims[dim];
     auto output_numel =
-        std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies<int64_t>());
+        std::accumulate(output_dims.begin(), output_dims.end(), 1LL, std::multiplies<int64_t>());
 
     auto inner_size = 1ULL;
     for(int32_t i = dim + 1; i < input_dims.size(); i++)
diff --git a/test/cpu_sum.hpp b/test/cpu_sum.hpp
index 8898a5654b..73d68eb9b8 100644
--- a/test/cpu_sum.hpp
+++ b/test/cpu_sum.hpp
@@ -39,7 +39,7 @@ void cpu_sum_forward(tensor<T> input,
 
     auto reduce_size = input_dims[dim];
     auto output_numel =
-        std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies<int64_t>());
+        std::accumulate(output_dims.begin(), output_dims.end(), 1LL, std::multiplies<int64_t>());
 
     auto inner_size = 1ULL;
     for(int32_t i = dim + 1; i < input_dims.size(); i++)
diff --git a/test/ctc.cpp b/test/ctc.cpp
index 1c759220f2..4dfd452d50 100644
--- a/test/ctc.cpp
+++ b/test/ctc.cpp
@@ -776,7 +776,7 @@ struct ctc_driver : test_driver
         losses = tensor<T>{lossesDims};
         std::fill(losses.begin(), losses.end(), T(0));
 
-        size_t labels_sz = std::accumulate(labelLengths.begin(), labelLengths.end(), 0);
+        size_t labels_sz = std::accumulate(labelLengths.begin(), labelLengths.end(), 0ULL);
 
         auto labels  = std::vector<int>(labels_sz);
         int blank_lb = ctcLossDesc.blank_label_id;
diff --git a/test/include_inliner.cpp b/test/include_inliner.cpp
index b4f1e4d5aa..62e6510d3f 100644
--- a/test/include_inliner.cpp
+++ b/test/include_inliner.cpp
@@ -37,24 +37,24 @@
 namespace miopen {
 namespace tests {
 
-static int Child(std::string_view cmd, const fs::path& path)
-{
-    return miopen::Process{cmd}("-source " + path);
-}
-
 class InlinerTest
 {
+    const TmpDir test_srcs{"test_include_inliner"};
+
+    int Child(const fs::path& exe, const fs::path& source) const
+    {
+        return test_srcs.Execute(exe.string(), "-source " + source);
+    }
+
 public:
     void Run(const fs::path& exe_path) const
     {
-        const TmpDir test_srcs{"test_include_inliner"};
-        const auto addkernels =
-            miopen::make_executable_name(exe_path.parent_path() / "addkernels").string();
+        const auto addkernels      = make_executable_name(exe_path.parent_path() / "addkernels");
         const auto header_filename = "header.h";
-        const auto asm_src         = test_srcs.path / "valid.s";
-        const auto valid_src       = test_srcs.path / "valid.cl";
-        const auto invalid_src     = test_srcs.path / "invalid.cl";
-        const auto header_src      = test_srcs.path / header_filename;
+        const auto asm_src         = test_srcs / "valid.s";
+        const auto valid_src       = test_srcs / "valid.cl";
+        const auto invalid_src     = test_srcs / "invalid.cl";
+        const auto header_src      = test_srcs / header_filename;
 
         // clang-format-off
         std::ofstream(valid_src.c_str()) << "#include <" << header_filename << ">\n"
diff --git a/test/lstm_common.hpp b/test/lstm_common.hpp
index d2b7d1a077..85d17dc138 100644
--- a/test/lstm_common.hpp
+++ b/test/lstm_common.hpp
@@ -465,7 +465,7 @@ struct verify_forward_infer_lstm : verify_forward_lstm<T>
         {
             reserveSpaceSize /= sizeof(T);
             reserveSpaceSize -=
-                nLayers * std::accumulate(batch_seq.begin(), batch_seq.begin() + seqLength, 0) *
+                nLayers * std::accumulate(batch_seq.begin(), batch_seq.begin() + seqLength, 0ULL) *
                 hiddenSize * bi;
             reserveSpaceSize *= 2;
             reserveSpaceSize *= sizeof(T);
@@ -771,7 +771,7 @@ struct verify_forward_train_lstm : verify_forward_lstm<T>
             outputCPPDescs, outputDescs, batch_seq, out_h, miopen::deref(rnnDesc).dataType);
 
         size_t inputBatchLenSum =
-            std::accumulate(batch_seq.begin(), batch_seq.begin() + seqLength, 0);
+            std::accumulate(batch_seq.begin(), batch_seq.begin() + seqLength, 0ULL);
 
         size_t reserveSpaceSize;
         reserveSpaceSize = 2 * 6 * miopen::deref(rnnDesc).nLayers * inputBatchLenSum * out_h;
@@ -1075,7 +1075,8 @@ verify_backward_data_lstm<T>::cpu() const
     std::vector<T> dhx(initHidden.size());
     std::vector<T> dcx(initHidden.size());
 
-    size_t inputBatchLenSum = std::accumulate(batch_seq.begin(), batch_seq.begin() + seqLength, 0);
+    size_t inputBatchLenSum =
+        std::accumulate(batch_seq.begin(), batch_seq.begin() + seqLength, 0ULL);
     size_t reserveSpaceSize;
     reserveSpaceSize = 2ULL * 6 * miopen::deref(rnnDesc).nLayers * inputBatchLenSum * hiddenSize *
                        ((dirMode != 0) ? 2 : 1);
@@ -1778,7 +1779,7 @@ struct lstm_basic_driver : test_driver
         std::vector<T> rsvgpu(reserveSpaceSize, T(0));
 
         size_t inputBatchLenSum =
-            std::accumulate(batchSeq.begin(), batchSeq.begin() + seqLength, 0);
+            std::accumulate(batchSeq.begin(), batchSeq.begin() + seqLength, 0ULL);
         reserveSpaceSize =
             2ULL * 6 * numLayers * inputBatchLenSum * hiddenSize * ((dirMode != 0) ? 2 : 1);
         if(useDropout != 0)
diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp
index 33bcb7164f..231b635a63 100644
--- a/test/pooling_common.hpp
+++ b/test/pooling_common.hpp
@@ -283,7 +283,7 @@ struct verify_backward_pooling
                             std::size_t mx_idx_dim = mx_idx;
                             mx_idx_dim /= std::accumulate(in_dim.begin() + i + 3,
                                                           in_dim.end(),
-                                                          1,
+                                                          1ULL,
                                                           std::multiplies<std::size_t>());
                             mx_idx_dim %= in_dim[i + 2];
                             idx[i + 2] = mx_idx_dim;
diff --git a/test/rnn_util.hpp b/test/rnn_util.hpp
index 680c52582f..eeeebcadf9 100644
--- a/test/rnn_util.hpp
+++ b/test/rnn_util.hpp
@@ -115,7 +115,7 @@ inline void createTensorDescArray(std::vector<miopen::TensorDescriptor>& td,
 inline std::tuple<size_t, size_t>
 GetTempPackedBuffersSize(std::vector<int> batchs, int in_vec, int out_vec)
 {
-    size_t total_batch = std::accumulate(batchs.begin(), batchs.end(), 0);
+    size_t total_batch = std::accumulate(batchs.begin(), batchs.end(), 0ULL);
 
     size_t in_buff_size  = total_batch * in_vec;
     size_t out_buff_size = total_batch * out_vec;
@@ -131,9 +131,12 @@ inline size_t getSuperTensorSize(const std::vector<int>& bs,
                                  bool isInput,
                                  bool isPadded)
 {
-    return static_cast<size_t>(isPadded ? seqLength * maxPaddingVal
-                                        : std::accumulate(bs.begin(), bs.end(), 0)) *
-           static_cast<size_t>(isInput ? inputSize : hiddenSize * (isBidirect ? 2 : 1));
+    return (isPadded //
+                ? static_cast<size_t>(seqLength) * maxPaddingVal
+                : std::accumulate(bs.begin(), bs.end(), 0ULL)) //
+           * (isInput                                          //
+                  ? static_cast<size_t>(inputSize)
+                  : static_cast<size_t>(hiddenSize) * (isBidirect ? 2 : 1));
 }
 
 template <typename Tgpu>
diff --git a/test/tensor_holder.hpp b/test/tensor_holder.hpp
index 3895dd9583..0e7ff7548f 100644
--- a/test/tensor_holder.hpp
+++ b/test/tensor_holder.hpp
@@ -183,7 +183,12 @@ struct tensor
 
     tensor(miopen::TensorDescriptor rhs) : desc(std::move(rhs))
     {
-        assert(desc.GetType() == miopen_type<T>{});
+        assert(desc.GetType() == miopen_type<T>{}
+               /// In the driver, T is input tensor type, but output tensor holders
+               /// are instantiatied with T as well. This leads to false assertion
+               /// failures when T is INT8 because output type is different.
+               /// \todo Get rid of this hack when the driver is improved:
+               || (miopen_type<T>{} == miopenInt8 && desc.GetType() == miopenInt32));
         data.resize(desc.GetElementSpace());
     }
 
diff --git a/test/tensor_layout.hpp b/test/tensor_layout.hpp
deleted file mode 100644
index 65d014d936..0000000000
--- a/test/tensor_layout.hpp
+++ /dev/null
@@ -1,80 +0,0 @@
-/*******************************************************************************
- *
- * MIT License
- *
- * Copyright (c) 2021 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- *******************************************************************************/
-#ifndef GUARD_TENSOR_LAYOUT_HPP
-#define GUARD_TENSOR_LAYOUT_HPP
-
-#include <map>
-#include <algorithm>
-#include <vector>
-#include <string>
-#include <iterator>
-#include <miopen/errors.hpp>
-
-template <typename T>
-void tensor_layout_to_strides(const std::vector<T>& len,
-                              const std::string& len_layout,
-                              const std::string& layout,
-                              std::vector<T>& strides)
-{
-    // Bind the layout and the dimension lengths together into a map.
-    std::map<char, T> dim_to_len;
-    std::transform(len.begin(),
-                   len.end(),
-                   len_layout.begin(),
-                   std::inserter(dim_to_len, dim_to_len.end()),
-                   [](T l, char dim) { return std::make_pair(dim, l); });
-
-    // Now construct the strides according to layout by multiply the
-    // dimension lengths together.
-    std::transform(len_layout.begin(),
-                   len_layout.end(),
-                   std::back_inserter(strides),
-                   [&layout, &dim_to_len](char cur_layout_char) {
-                       auto pos = layout.find(cur_layout_char);
-                       if(pos == std::string::npos)
-                       {
-                           MIOPEN_THROW(std::string("mismatched layout string, unexpect char: ")
-                                            .append(1, cur_layout_char));
-                       }
-                       return std::accumulate(layout.begin() + pos + 1,
-                                              layout.end(),
-                                              1,
-                                              [&dim_to_len](T accumulator, char l) {
-                                                  return accumulator * dim_to_len[l];
-                                              });
-                   });
-}
-
-inline std::string tensor_layout_get_default(int size)
-{
-    if(size == 4)
-        return "NCHW";
-    if(size == 5)
-        return "NCDHW";
-    return "";
-}
-
-#endif