From 309309b11a839de67196c26450ff7ffcac98af26 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Sun, 5 Nov 2023 01:39:43 +0300
Subject: [PATCH 01/50] Update docker and miopen version

---
 CMakeLists.txt |  2 +-
 Dockerfile     | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fc1d2594e0..a8fc802244 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -104,7 +104,7 @@ if(NOT WIN32 AND NOT APPLE)
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
 endif()
 
-rocm_setup_version(VERSION 3.00.0)
+rocm_setup_version(VERSION 3.01.0)
 
 list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake )
 include(TargetFlags)
diff --git a/Dockerfile b/Dockerfile
index 4e4281958c..a97525c710 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:20.04 as miopen
+FROM ubuntu:22.04 as miopen
 ARG DEBIAN_FRONTEND=noninteractive
 
 # Support multiarch
@@ -18,17 +18,17 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
 ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
 RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg
 
-RUN wget https://repo.radeon.com/amdgpu-install/5.7.1/ubuntu/focal/amdgpu-install_5.7.50701-1_all.deb --no-check-certificate
+RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
 RUN apt-get update && \
 DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
-    ./amdgpu-install_5.7.50701-1_all.deb
+    ./amdgpu-install_6.0.60000-1_all.deb
 
 # Add rocm repository
-RUN export ROCM_APT_VER=5.7.1;\
+RUN export ROCM_APT_VER=6.0;\
 echo $ROCM_APT_VER &&\
-sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\
-sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list'
-RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
+sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\
+sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list'
+RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list"
 
 RUN amdgpu-install -y --usecase=rocm --no-dkms
 

From 61e17213490a047c283d6e1e5c8af06d226e1680 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Sun, 5 Nov 2023 01:44:03 +0300
Subject: [PATCH 02/50] update FIN to develop

---
 fin | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fin b/fin
index ae2ff171f9..044f5e90c6 160000
--- a/fin
+++ b/fin
@@ -1 +1 @@
-Subproject commit ae2ff171f9803e2731092a1309ed71ffc18ec2c1
+Subproject commit 044f5e90c6ddb2184467e7029f39c2d7fba19d29

From 58c4d97cfcc75976ff2b867702cea6237288e57c Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Fri, 10 Nov 2023 17:56:32 -0800
Subject: [PATCH 03/50] update a few more requirements

---
 dev-requirements.txt | 2 +-
 requirements.txt     | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 6efae8c0de..788514f780 100755
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,3 +1,3 @@
 ROCmSoftwarePlatform/rocm-recipes@d7b71f8ff71572833c8cf15b74279dd034e66f9d
 -f requirements.txt
-danmar/cppcheck@2.9
+danmar/cppcheck@2.12.1
diff --git a/requirements.txt b/requirements.txt
index d1864085e2..a42de6d4f5 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
 sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On
 boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations "
-ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build
 ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH=/opt/conda/envs/py_3.9 -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
 nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off
 ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0

From 39572c04540e8f9d0f874637522534ce88290a9a Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Sat, 11 Nov 2023 21:54:37 -0800
Subject: [PATCH 04/50] overrisde existing installed files

---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index a97525c710..3363a59264 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -98,9 +98,9 @@ RUN tar zxvf /tmp/ccache.tar.gz -C /tmp/ && mkdir /tmp/ccache-${CCACHE_COMMIT}/b
 RUN ccache -s 
 ARG COMPILER_LAUNCHER=""
 RUN if [ "$USE_FIN" = "ON" ]; then \
-        rbuild prepare -s fin -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
+        sudo rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
     else \
-        rbuild prepare -s develop -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
+        sudo rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
     fi
 
 RUN ccache -s 

From 4a011d15d0f6cd54ffb37baf5cbc03b089968b54 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Sun, 12 Nov 2023 00:13:15 -0800
Subject: [PATCH 05/50] purge CK before installing new

---
 Dockerfile | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 3363a59264..d7e313227e 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -96,11 +96,17 @@ RUN tar zxvf /tmp/ccache.tar.gz -C /tmp/ && mkdir /tmp/ccache-${CCACHE_COMMIT}/b
     cd /tmp/ccache-${CCACHE_COMMIT}/build && \
     cmake -DZSTD_FROM_INTERNET=ON -DHIREDIS_FROM_INTERNET=ON .. && make -j install && rm -rf /tmp/*
 RUN ccache -s 
+
+# purge existing composable kernel installed with ROCm
+# hence cannot use autoremove since it will remove more components
+RUN apt-get update && \
+DEBIAN_FRONTEND=noninteractive apt-get purge -y --allow-unauthenticated \
+    composablekernel-dev
 ARG COMPILER_LAUNCHER=""
 RUN if [ "$USE_FIN" = "ON" ]; then \
-        sudo rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
+        rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
     else \
-        sudo rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
+        rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
     fi
 
 RUN ccache -s 

From 58f8d80b1a2ffd6ed9d98b9d48f44726ca5c8d9c Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Sun, 12 Nov 2023 20:51:43 -0800
Subject: [PATCH 06/50] fix hip tidy issues

---
 CMakeLists.txt    | 12 ++++++++++++
 src/rnn_api.cpp   |  2 +-
 test/na_train.cpp |  2 +-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a8fc802244..ff24ae4a0e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -765,6 +765,18 @@ enable_cppcheck(
         knownConditionTrueFalse
         shadowFunction
         moduloofone
+        ###################################################################
+        # TODO Code Quality WORKAROUND ROCm 6.0 &&
+        # Ubuntu 22.04 && cppcheck 2.12.1 update
+        ###################################################################
+        duplInheritedMember
+        constParameterReference
+        constParameterPointer
+        constVariableReference
+        constVariablePointer
+        useStlAlgorithm
+        uselessOverride
+        unusedScopedObject   
     FORCE
     SOURCES
         addkernels/
diff --git a/src/rnn_api.cpp b/src/rnn_api.cpp
index 8dce8f364e..e04dc5c2ea 100644
--- a/src/rnn_api.cpp
+++ b/src/rnn_api.cpp
@@ -529,7 +529,7 @@ static void LogCmdRNN(const miopenTensorDescriptor_t* xDesc,
                       const int seqLength,
                       const RNNDir_t dir)
 {
-    if(miopen::IsLoggingCmd())
+    if(miopen::IsLoggingCmd() && seqLength > 0)
     {
         std::string mode;
         miopenRNNMode_t rnnMode = miopen::deref(rnnDesc).rnnMode;
diff --git a/test/na_train.cpp b/test/na_train.cpp
index 24529058ed..80f3fc781f 100644
--- a/test/na_train.cpp
+++ b/test/na_train.cpp
@@ -802,7 +802,7 @@ struct na_fusion_driver : test_driver
 
         std::size_t input_n, input_c, input_h, input_w;
         std::tie(input_n, input_c, input_h, input_w) = miopen::tien<4>(input.desc.GetLengths());
-        this->tolerance                              = 80 * float(input.desc.GetElementSize());
+        this->tolerance                              = 80 * double(input.desc.GetElementSize());
         ptr_activdesc                                = GetManagedActivDesc();
         miopenSetActivationDescriptor(ptr_activdesc.get(), activ_mode, alpha, beta, gamma);
         auto&& handle = get_handle();

From 7eefad5d2899fedf6085577f52a8441252276e74 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Mon, 13 Nov 2023 08:37:53 -0800
Subject: [PATCH 07/50] add a few missing ones

---
 CMakeLists.txt      | 1 +
 src/convolution.cpp | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ff24ae4a0e..8356eb7da2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -770,6 +770,7 @@ enable_cppcheck(
         # Ubuntu 22.04 && cppcheck 2.12.1 update
         ###################################################################
         duplInheritedMember
+        constParameterCallback
         constParameterReference
         constParameterPointer
         constVariableReference
diff --git a/src/convolution.cpp b/src/convolution.cpp
index d0f9a64ffb..dbbe03eda3 100644
--- a/src/convolution.cpp
+++ b/src/convolution.cpp
@@ -289,7 +289,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor&
         }
     }
 
-    std::size_t out_c;
+    std::size_t out_c = 0;
     std::vector<std::size_t> out_lens(spatial_dim + 2);
 
     auto out_spatial = boost::adaptors::slice(out_lens, 2, 2 + spatial_dim);

From ed1a7326ec49c886093359e17f820d89531406a4 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Mon, 13 Nov 2023 08:38:54 -0800
Subject: [PATCH 08/50] adopt review opinion

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index a42de6d4f5..be5f50c5d8 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On
 boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations "
+# ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build
 ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH=/opt/conda/envs/py_3.9 -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
 nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off
 ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0

From 8053a0cdee07b3a15f783bfb436f9f3631a5d8bf Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Mon, 13 Nov 2023 23:04:00 -0800
Subject: [PATCH 09/50] Update CMakeLists.txt

Co-authored-by: JD <jahandad@gmail.com>
---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8356eb7da2..468bcfbfeb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -104,7 +104,7 @@ if(NOT WIN32 AND NOT APPLE)
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
 endif()
 
-rocm_setup_version(VERSION 3.01.0)
+rocm_setup_version(VERSION 3.1.0)
 
 list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake )
 include(TargetFlags)

From f958b469798aa814c1220feabb92455687bfa556 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Tue, 14 Nov 2023 12:04:53 -0800
Subject: [PATCH 10/50] fix issues typo and hiprtc header

---
 docs/DebugAndLogging.md             | 2 +-
 src/kernels/MIOpenCheckNumerics.cpp | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/docs/DebugAndLogging.md b/docs/DebugAndLogging.md
index 3ae5db123a..b1e497efcc 100644
--- a/docs/DebugAndLogging.md
+++ b/docs/DebugAndLogging.md
@@ -94,7 +94,7 @@ Direct Solutions:
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11` - `ConvOclDirectFwd11x11`.
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN` - `ConvOclDirectFwdGen`.
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD` - `ConvOclDirectFwd`.
-* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd`.
+* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd1x1`.
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2` - `ConvOclBwdWrW2<n>` (where n = `{1,2,4,8,16}`), and `ConvOclBwdWrW2NonTunable`.
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53` - `ConvOclBwdWrW53`.
 * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1` - `ConvOclBwdWrW1x1`
diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp
index 827f4d1397..915f76a3d2 100644
--- a/src/kernels/MIOpenCheckNumerics.cpp
+++ b/src/kernels/MIOpenCheckNumerics.cpp
@@ -26,6 +26,7 @@
 #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
 #include <hip/hip_fp16.h>
 #include <hip/hip_runtime.h>
+#include <limits> // std::numeric_limits
 #endif
 
 #include <hip/hip_bfloat16.h>
@@ -70,8 +71,6 @@ using conditional_t = typename conditional<predicate, X, Y>::type;
 #endif
 #endif // __HIPCC_RTC__
 
-#include <limits> // std::numeric_limits
-
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 1
 #include "hip_float8.hpp"
 

From 699cf08415bba38fc91c47dfae4f12df3eca70a5 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Thu, 16 Nov 2023 17:21:38 -0800
Subject: [PATCH 11/50] Keep base docker at Ubuntu 20.04

---
 Dockerfile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index d7e313227e..e3f9a89338 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:22.04 as miopen
+FROM ubuntu:20.04 as miopen
 ARG DEBIAN_FRONTEND=noninteractive
 
 # Support multiarch
@@ -18,7 +18,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
 ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
 RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg
 
-RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
+RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/focal/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
 RUN apt-get update && \
 DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
     ./amdgpu-install_6.0.60000-1_all.deb
@@ -26,9 +26,9 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
 # Add rocm repository
 RUN export ROCM_APT_VER=6.0;\
 echo $ROCM_APT_VER &&\
-sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\
-sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list'
-RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list"
+sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\
+sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list'
+RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
 
 RUN amdgpu-install -y --usecase=rocm --no-dkms
 

From 3e847dad39ff5f9e524ace83bf2187633109d87b Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Fri, 17 Nov 2023 13:29:22 -0800
Subject: [PATCH 12/50] Revert limitations on limits header

---
 src/kernels/MIOpenCheckNumerics.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp
index 915f76a3d2..827f4d1397 100644
--- a/src/kernels/MIOpenCheckNumerics.cpp
+++ b/src/kernels/MIOpenCheckNumerics.cpp
@@ -26,7 +26,6 @@
 #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
 #include <hip/hip_fp16.h>
 #include <hip/hip_runtime.h>
-#include <limits> // std::numeric_limits
 #endif
 
 #include <hip/hip_bfloat16.h>
@@ -71,6 +70,8 @@ using conditional_t = typename conditional<predicate, X, Y>::type;
 #endif
 #endif // __HIPCC_RTC__
 
+#include <limits> // std::numeric_limits
+
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 1
 #include "hip_float8.hpp"
 

From 72eb1800aa5be8819a4270ad4f396e483c56165b Mon Sep 17 00:00:00 2001
From: Umang Yadav <umayadav@amd.com>
Date: Tue, 5 Dec 2023 16:38:00 +0000
Subject: [PATCH 13/50] add fixes

---
 .../gpu_reference_kernel/fp8_naive_conv.cpp   | 25 ++++++++-
 src/kernels/hip_float8.hpp                    | 56 ++++++++++++++-----
 2 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
index 3b4eabecfb..d294c9e182 100644
--- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
+++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
@@ -63,15 +63,36 @@ struct conditional<false, X, Y>
 
 template <bool predicate, typename X, typename Y>
 using conditional_t = typename conditional<predicate, X, Y>::type;
+
+template <class T, T V>
+struct integral_constant
+{
+    static constexpr T value = V;
+    using value_type         = T;
+    using type               = integral_constant;
+    constexpr operator value_type() const noexcept { return value; }
+    constexpr value_type operator()() const noexcept { return value; }
+    static constexpr type to() { return {}; }
+};
+
+template <bool B>
+using bool_constant = integral_constant<bool, B>;
+
+template <class T, class U>
+struct is_same : bool_constant<__is_same(T, U)>
+{
+}
+
 } // namespace std
+
 #else
 #include <cstdint> // int8_t, int16_t
 #include <cmath>   // float_t
 #endif
+#else              // __HIPCC_RTC__
+#include <limits>
 #endif // __HIPCC_RTC__
 
-#include <limits> // std::numeric_limits
-
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 1
 #include "hip_float8.hpp"
 
diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp
index a9b2a559a8..2947d6d713 100644
--- a/src/kernels/hip_float8.hpp
+++ b/src/kernels/hip_float8.hpp
@@ -83,6 +83,9 @@ inline MIOPEN_HIP_HOST_DEVICE bool get_hip_f8_bias_mode()
 #endif
 }
 
+template <typename T>
+class numeric_limits;
+
 template <hip_f8_type T>
 struct hip_f8
 {
@@ -262,8 +265,7 @@ struct hip_f8
 
     inline MIOPEN_HIP_HOST_DEVICE bool operator==(const hip_f8& rhs) const
     {
-        if((rhs.is_zero() && this->is_zero()) ||
-           (fabs(rhs - *this) < std::numeric_limits<hip_f8<T>>::epsilon()))
+        if((rhs.is_zero() && this->is_zero()) || (this->data == rhs.data))
         {
             return true;
         }
@@ -487,19 +489,6 @@ MIOPEN_HIP_HOST_DEVICE T F8_Max()
     x.bits = 0x7F;
     return x.value;
 }
-} // namespace miopen_f8
-
-// define numeric limits for the new data type
-namespace std {
-inline bool isfinite(miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8> x) // NOLINT
-{
-    return x.is_inf();
-}
-
-inline bool isfinite(miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8> x) // NOLINT
-{
-    return x.is_inf();
-}
 
 template <>
 class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>
@@ -555,7 +544,44 @@ class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>
     }
 };
 
+} // namespace miopen_f8
+
+#ifndef __HIPCC_RTC__
+namespace std {
+inline bool isfinite(miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8> x) // NOLINT
+{
+    return x.is_inf();
+}
+
+inline bool isfinite(miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8> x) // NOLINT
+{
+    return x.is_inf();
+}
+
+inline bool isnan(miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8> x) // NOLINT
+{
+    return x.is_nan();
+}
+
+inline bool isnan(miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8> x) // NOLINT
+{
+    return x.is_nan();
+}
+
+template <>
+class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>
+    : public miopen_f8::numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>
+{
+};
+
+template <>
+class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>
+    : public miopen_f8::numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>
+{
+};
+
 } // namespace std
+#endif
 
 template <miopen_f8::hip_f8_type T>
 struct hip_f8x4

From 10e95817ac1334ea765b755f52f55c615794d60f Mon Sep 17 00:00:00 2001
From: Umang Yadav <umayadav@amd.com>
Date: Tue, 5 Dec 2023 18:16:52 +0000
Subject: [PATCH 14/50] changes that works for MIGraphX

---
 src/kernels/MIOpenCheckNumerics.cpp           |  4 ++--
 .../gpu_reference_kernel/fp8_naive_conv.cpp   | 19 -------------------
 src/kernels/hip_f8_impl.hpp                   |  8 ++++----
 3 files changed, 6 insertions(+), 25 deletions(-)

diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp
index 827f4d1397..8a363e4eb0 100644
--- a/src/kernels/MIOpenCheckNumerics.cpp
+++ b/src/kernels/MIOpenCheckNumerics.cpp
@@ -68,9 +68,9 @@ using conditional_t = typename conditional<predicate, X, Y>::type;
 #include <cstdint> // int8_t, int16_t
 #include <cmath>   // float_t
 #endif
-#endif // __HIPCC_RTC__
-
+#else              // __HIPCC_RTC__
 #include <limits> // std::numeric_limits
+#endif // __HIPCC_RTC__
 
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 1
 #include "hip_float8.hpp"
diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
index d294c9e182..ed70941e6f 100644
--- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
+++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
@@ -64,25 +64,6 @@ struct conditional<false, X, Y>
 template <bool predicate, typename X, typename Y>
 using conditional_t = typename conditional<predicate, X, Y>::type;
 
-template <class T, T V>
-struct integral_constant
-{
-    static constexpr T value = V;
-    using value_type         = T;
-    using type               = integral_constant;
-    constexpr operator value_type() const noexcept { return value; }
-    constexpr value_type operator()() const noexcept { return value; }
-    static constexpr type to() { return {}; }
-};
-
-template <bool B>
-using bool_constant = integral_constant<bool, B>;
-
-template <class T, class U>
-struct is_same : bool_constant<__is_same(T, U)>
-{
-}
-
 } // namespace std
 
 #else
diff --git a/src/kernels/hip_f8_impl.hpp b/src/kernels/hip_f8_impl.hpp
index c7a62f9f72..45edeffc4b 100644
--- a/src/kernels/hip_f8_impl.hpp
+++ b/src/kernels/hip_f8_impl.hpp
@@ -87,8 +87,8 @@ MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8_no_range_reduce(T _x,
 template <int wm, int we, typename T, bool negative_zero_nan, bool clip>
 MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8(T _x, bool stoch, uint32_t rng)
 {
-    constexpr bool is_half  = std::is_same<T, half>::value;
-    constexpr bool is_float = std::is_same<T, float>::value;
+    constexpr bool is_half  = __is_same_as(T, half);
+    constexpr bool is_float = __is_same_as(T, float);
     static_assert(wm + we == 7, "wm+we==7");
     static_assert(is_half || is_float, "Only half and float can be cast to f8");
 
@@ -272,8 +272,8 @@ MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8(T _x, bool stoch, uint32_t rng)
 template <int wm, int we, typename T, bool negative_zero_nan>
 MIOPEN_HIP_HOST_DEVICE T cast_from_f8(uint8_t x)
 {
-    constexpr bool is_half  = std::is_same<T, half>::value;
-    constexpr bool is_float = std::is_same<T, float>::value;
+    constexpr bool is_half  = __is_same_as(T, half);
+    constexpr bool is_float = __is_same_as(T, float);
     static_assert(is_half || is_float, "only half and float are supported");
 
     constexpr int weo = is_half ? 5 : 8;

From f498789403bea40ce45cc74d34719bdf99b18e64 Mon Sep 17 00:00:00 2001
From: Umang Yadav <umayadav@amd.com>
Date: Tue, 5 Dec 2023 18:37:22 +0000
Subject: [PATCH 15/50] rever changes for checknumerics

---
 src/kernels/MIOpenCheckNumerics.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp
index 8a363e4eb0..827f4d1397 100644
--- a/src/kernels/MIOpenCheckNumerics.cpp
+++ b/src/kernels/MIOpenCheckNumerics.cpp
@@ -68,10 +68,10 @@ using conditional_t = typename conditional<predicate, X, Y>::type;
 #include <cstdint> // int8_t, int16_t
 #include <cmath>   // float_t
 #endif
-#else              // __HIPCC_RTC__
-#include <limits> // std::numeric_limits
 #endif // __HIPCC_RTC__
 
+#include <limits> // std::numeric_limits
+
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 1
 #include "hip_float8.hpp"
 

From 69c0d99f3249086a505ddf434027efec738dfa93 Mon Sep 17 00:00:00 2001
From: Umang Yadav <umayadav@amd.com>
Date: Tue, 5 Dec 2023 20:12:12 +0000
Subject: [PATCH 16/50] Formatting

---
 src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
index ed70941e6f..f24a2d8813 100644
--- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
+++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
@@ -70,7 +70,7 @@ using conditional_t = typename conditional<predicate, X, Y>::type;
 #include <cstdint> // int8_t, int16_t
 #include <cmath>   // float_t
 #endif
-#else              // __HIPCC_RTC__
+#else // __HIPCC_RTC__
 #include <limits>
 #endif // __HIPCC_RTC__
 

From cf02a64bc0daecd6f95e1c193fd91654cb8dc48e Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Tue, 5 Dec 2023 15:41:00 -0800
Subject: [PATCH 17/50] update docker file

---
 Dockerfile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 9ad2df53f1..c0dbb7b111 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:20.04 as miopen
+FROM ubuntu:22.04 as miopen
 ARG DEBIAN_FRONTEND=noninteractive
 
 # Support multiarch
@@ -18,7 +18,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
 ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
 RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg
 
-RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/focal/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
+RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
 RUN apt-get update && \
 DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
     ./amdgpu-install_6.0.60000-1_all.deb
@@ -26,9 +26,9 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
 # Add rocm repository
 RUN export ROCM_APT_VER=6.0;\
 echo $ROCM_APT_VER &&\
-sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\
-sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list'
-RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
+sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\
+sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list'
+RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list"
 
 RUN amdgpu-install -y --usecase=rocm --no-dkms
 

From ebeef1db7c3e8abbc7eefc897ecdedcc0d27ccfe Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Tue, 5 Dec 2023 16:08:47 -0800
Subject: [PATCH 18/50] avoid ldd conflicts

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 75cb1abe21..e636fdd4ab 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On
 boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations "
 # ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build
-ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH=/opt/conda/envs/py_3.9 -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
+ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
 nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off
 ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0
 ROCmSoftwarePlatform/eigen@3.4.0

From ee34b45dc09368eda9e6d319d85e57cbe5db80c1 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Tue, 5 Dec 2023 22:09:37 -0800
Subject: [PATCH 19/50] update CK commit hash

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index e636fdd4ab..4237ba78ad 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,4 @@ nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off
 ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0
 ROCmSoftwarePlatform/eigen@3.4.0
 ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50
-ROCmSoftwarePlatform/composable_kernel@df467969684505876ef3a95fef94b77645836494 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON
+ROCmSoftwarePlatform/composable_kernel@5e17095fea33e8de1a0b549a3ad705699318b8be -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON

From c6b435284bd6d88012e6d0aa3a4241f003321118 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Fri, 15 Dec 2023 14:37:59 -0800
Subject: [PATCH 20/50] update dockerfile

---
 Dockerfile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index c0dbb7b111..0d47e02dfd 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,16 +18,16 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
 ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
 RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg
 
-RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
+RUN wget https://repo.radeon.com/amdgpu-install/6.0.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
 RUN apt-get update && \
 DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
     ./amdgpu-install_6.0.60000-1_all.deb
 
 # Add rocm repository
-RUN export ROCM_APT_VER=6.0;\
+RUN export ROCM_APT_VER=6.0.0;\
 echo $ROCM_APT_VER &&\
-sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\
-sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list'
+sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\
+sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list'
 RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list"
 
 RUN amdgpu-install -y --usecase=rocm --no-dkms

From 9990a5dcb706af720ca715bba37534c245da098d Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Sat, 16 Dec 2023 11:31:18 -0800
Subject: [PATCH 21/50] update dockerfile

---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 0d47e02dfd..fde9eafd61 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,13 +18,13 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
 ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
 RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg
 
-RUN wget https://repo.radeon.com/amdgpu-install/6.0.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
+RUN wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
 RUN apt-get update && \
 DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
     ./amdgpu-install_6.0.60000-1_all.deb
 
 # Add rocm repository
-RUN export ROCM_APT_VER=6.0.0;\
+RUN export ROCM_APT_VER=6.0;\
 echo $ROCM_APT_VER &&\
 sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\
 sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list'

From 0133956352f8f2b041ed68d018bdbe46e8ba6940 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Sat, 16 Dec 2023 19:34:13 -0800
Subject: [PATCH 22/50] workaround build issues of CK

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 06983fb0d1..f58fec2b49 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,4 @@ nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off
 ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0
 ROCmSoftwarePlatform/eigen@3.4.0
 ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50
-ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON
+ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON -DCMAKE_CXX_FLAGS=" -Wno-unused-parameter "

From dc55bba6330d6173703b4e732cfaf1d199620aac Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Sat, 16 Dec 2023 20:50:10 -0800
Subject: [PATCH 23/50] fix the real issue in compiling rocMLIR

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index f58fec2b49..506eb6803d 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
 sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On
 boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations "
 # ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build
-ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
+ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda -DCMAKE_CXX_FLAGS=" -Wno-unused-parameter "
 nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off
 ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0
 ROCmSoftwarePlatform/eigen@3.4.0
 ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50
-ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON -DCMAKE_CXX_FLAGS=" -Wno-unused-parameter "
+ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON

From 0d48eb3bf9cf52fc402fa63387f51ea477e998bc Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Sat, 16 Dec 2023 23:59:40 -0800
Subject: [PATCH 24/50] bump CK commit hash

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 506eb6803d..2c323504f8 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
 sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On
 boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations "
 # ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build
-ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda -DCMAKE_CXX_FLAGS=" -Wno-unused-parameter "
+ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
 nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off
 ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0
 ROCmSoftwarePlatform/eigen@3.4.0
 ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50
-ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON
+ROCmSoftwarePlatform/composable_kernel@55a89c746eb6cf7973c47fb9b2635e0f73bd2fc2 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON

From 19b1a954295250250969bffcc9a76f562f835f02 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Sun, 17 Dec 2023 12:41:55 -0800
Subject: [PATCH 25/50] WA for Issue 2600 and turn on smoke tests by default

---
 Jenkinsfile          | 6 +++---
 test/handle_test.cpp | 6 +++++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 7e07d3deba..b1bbb916dd 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -436,15 +436,15 @@ pipeline {
             description: "")
         booleanParam(
             name: "BUILD_SMOKE_FP32",
-            defaultValue: env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false,
+            defaultValue: true, // env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false,
             description: "")
         booleanParam(
             name: "BUILD_SMOKE_AUX1",
-            defaultValue: env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false,
+            defaultValue: true, // env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false,
             description: "")
         booleanParam(
             name: "BUILD_SMOKE_FP16_BF16_INT8",
-            defaultValue: env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false,
+            defaultValue: true, // env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false,
             description: "")
         booleanParam(
             name: "BUILD_FULL_TESTS",
diff --git a/test/handle_test.cpp b/test/handle_test.cpp
index 2548a7ad4b..d2620c579a 100644
--- a/test/handle_test.cpp
+++ b/test/handle_test.cpp
@@ -28,6 +28,10 @@
 /// \todo Create dedicated ticket and rename macro.
 #define WORKAROUND_SWDEV_257056_PCH_MISSING_MACROS 1
 
+// https://gerrit-git.amd.com/c/compute/ec/clr/+/972441
+#define WORKAROUND_ISSUE_2600 \
+    (HIP_PACKAGE_VERSION_FLAT > 5007023384ULL && HIP_PACKAGE_VERSION_FLAT <= 6000023494ULL)
+
 #include <miopen/config.h>
 #include <miopen/handle.hpp>
 #include <miopen/execution_context.hpp>
@@ -231,7 +235,7 @@ std::string WriteNop(kernel_type_t kern_type)
 void test_warnings(kernel_type_t kern_type)
 {
     auto&& h = get_handle();
-#if MIOPEN_BUILD_DEV
+#if MIOPEN_BUILD_DEV && !WORKAROUND_ISSUE_2600
     if(kern_type == miopenOpenCLKernelType)
     {
         EXPECT(throws([&] {

From 592d5cba905611c6f3bf17369a52d165f7811f57 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Mon, 18 Dec 2023 22:06:23 -0800
Subject: [PATCH 26/50] ROCm 6.0 replaces all __HIP_PLATFORM_HCC__ with
 __HIP_PLATFORM_AMD__

---
 src/CMakeLists.txt                            |  2 +-
 src/comgr.cpp                                 |  2 +-
 src/composable_kernel/.clang-tidy             |  2 +-
 src/composable_kernel/cmake/ClangTidy.cmake   |  2 +-
 .../external/rocm/include/bfloat16_dev.hpp    | 10 +++---
 src/kernels/bfloat16_dev.hpp                  | 10 +++---
 src/kernels/float_types.h                     | 36 +++++++++----------
 src/kernels/hip_f8_impl.hpp                   |  2 +-
 src/kernels/hip_float8.hpp                    |  2 +-
 9 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1d5548db7e..df06e9785a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -770,7 +770,7 @@ elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP")
     endif()
     if(ENABLE_HIP_WORKAROUNDS)
         # Workaround hip not setting its usage requirements correctly
-        target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_HCC__=1 )
+        target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_AMD__=1 )
     endif()
     # This is helpful for the tests
     target_link_libraries( MIOpen INTERFACE $<BUILD_INTERFACE:hip::device> )
diff --git a/src/comgr.cpp b/src/comgr.cpp
index 4040881e09..2c1b82560c 100644
--- a/src/comgr.cpp
+++ b/src/comgr.cpp
@@ -1293,7 +1293,7 @@ void BuildHip(const std::string& name,
             miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit());
         compiler::lc::RemoveOptionsUnwanted(opts);
         opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073
-        opts.push_back("-D__HIP_PLATFORM_HCC__=1");            // Workaround?
+        opts.push_back("-D__HIP_PLATFORM_AMD__=1");            // Workaround?
         opts.push_back("-D__HIP_PLATFORM_AMD__=1");            // Workaround?
 #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT
         if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name()))
diff --git a/src/composable_kernel/.clang-tidy b/src/composable_kernel/.clang-tidy
index 5c2b781687..8d0880abcf 100644
--- a/src/composable_kernel/.clang-tidy
+++ b/src/composable_kernel/.clang-tidy
@@ -1,3 +1,3 @@
 CheckOptions:
   - key: bugprone-reserved-identifier.AllowedIdentifiers
-    value: '__HIP_PLATFORM_HCC__;__HIP_ROCclr__'
+    value: '__HIP_PLATFORM_AMD__;__HIP_ROCclr__'
diff --git a/src/composable_kernel/cmake/ClangTidy.cmake b/src/composable_kernel/cmake/ClangTidy.cmake
index 8de726de09..04ec12c326 100644
--- a/src/composable_kernel/cmake/ClangTidy.cmake
+++ b/src/composable_kernel/cmake/ClangTidy.cmake
@@ -149,7 +149,7 @@ function(clang_tidy_check TARGET)
             add_custom_target(${tidy_target}
                 # for some targets clang-tidy not able to get information from .clang-tidy
                 DEPENDS ${SOURCE}
-                COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
+                COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_AMD__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
                 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                 COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..."
             )
diff --git a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp
index f5fa35adfb..26d8645d61 100644
--- a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp
+++ b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp
@@ -30,7 +30,7 @@
 extern "C" {
 #endif
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define EXECUTION_SPECIFIER __device__
 #else
 #define EXECUTION_SPECIFIER
@@ -43,7 +43,7 @@ typedef union
 
 // Composable kernels are written in HIP language. The language doesnt support
 // ushort2.hi or ushort2.low.
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     ushort ushortvec[2];
 #endif // MIOPEN_BACKEND_HIP
     float f32;
@@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
 {
     cvt_bf16_fp32_t target_val;
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     target_val.ushortx2 = make_ushort2(0, src_val);
 #else
     target_val.ushortx2 = (ushort2)(0, src_val);
@@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
 // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
 // incrementing it causes it to become an exponent of 0xFF and a mantissa
 // of 0x00, which is Inf, the next higher value to the unrounded value.
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
         target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
 #else
         target_val.u32 +=
@@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
 #endif // MIOPEN_USE_RNE_BFLOAT16
     }
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     return target_val.ushortvec[1];
 #else
     return target_val.ushortx2.hi;
diff --git a/src/kernels/bfloat16_dev.hpp b/src/kernels/bfloat16_dev.hpp
index f5f24baa81..4b85a95975 100644
--- a/src/kernels/bfloat16_dev.hpp
+++ b/src/kernels/bfloat16_dev.hpp
@@ -30,7 +30,7 @@
 extern "C" {
 #endif
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define EXECUTION_SPECIFIER __device__
 #else
 #define EXECUTION_SPECIFIER
@@ -43,7 +43,7 @@ typedef union cvt_bf16_fp32
 
 // Composable kernels are written in HIP language. The language doesnt support
 // ushort2.hi or ushort2.low.
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     ushort ushortvec[2];
 #endif // MIOPEN_BACKEND_HIP
     float f32;
@@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
 {
     cvt_bf16_fp32_t target_val;
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     target_val.ushortx2 = make_ushort2(0, src_val);
 #else
     target_val.ushortx2 = (ushort2)(0, src_val);
@@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
 // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
 // incrementing it causes it to become an exponent of 0xFF and a mantissa
 // of 0x00, which is Inf, the next higher value to the unrounded value.
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
         target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
 #else
         target_val.u32 +=
@@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
 #endif // MIOPEN_USE_RNE_BFLOAT16
     }
 
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
     return target_val.ushortvec[1];
 #else
     return target_val.ushortx2.hi;
diff --git a/src/kernels/float_types.h b/src/kernels/float_types.h
index 5406ba85ec..beded11d8d 100644
--- a/src/kernels/float_types.h
+++ b/src/kernels/float_types.h
@@ -34,7 +34,7 @@
 #define FOUR 4
 #define EIGHT 8
 #if MIOPEN_USE_FP8 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT hip_f8<miopen_f8::hip_f8_type::fp8>
 #define FLOAT_ACCUM float
 // HIP implements the correct operators for conversion
@@ -58,7 +58,7 @@
 #endif // MIOPEN_USE_FP8
 
 #if MIOPEN_USE_BFP8 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT hip_f8<miopen_f8::hip_f8_type::bf8>
 #define FLOAT_ACCUM float
 #else
@@ -79,7 +79,7 @@
 // #endif
 #endif // MIOPEN_USE_BFP8
 
-#ifndef __HIP_PLATFORM_HCC__
+#ifndef __HIP_PLATFORM_AMD__
 #define _FLOAT2 PPCAT(_FLOAT, TWO)
 #define _FLOAT4 PPCAT(_FLOAT, FOUR)
 #define _FLOAT8 PPCAT(_FLOAT, EIGHT)
@@ -99,19 +99,19 @@
 #endif
 
 #if MIOPEN_USE_DOUBLE_ACCUM
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT_ACCUM double
 #else
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 #define _FLOAT_ACCUM double
-#endif // __HIP_PLATFORM_HCC__
+#endif // __HIP_PLATFORM_AMD__
 #define MAX_VAL_ACCUM DBL_MAX
 #else // MIOPEN_USE_DOUBLE_ACCUM
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT_ACCUM float
 #else
 #define _FLOAT_ACCUM float
-#endif // __HIP_PLATFORM_HCC__
+#endif // __HIP_PLATFORM_AMD__
 #ifndef FLT_MAX
 #define MAX_VAL_ACCUM 3.402823466e+38F
 #else
@@ -120,12 +120,12 @@
 #endif // MIOPEN_USE_DOUBLE_ACCUM
 
 #if MIOPEN_USE_FP16 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT _Float16
-#else // __HIP_PLATFORM_HCC__
+#else // __HIP_PLATFORM_AMD__
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 #define _FLOAT half
-#endif // __HIP_PLATFORM_HCC__
+#endif // __HIP_PLATFORM_AMD__
 #define SIZEOF_FLOAT 2
 // Max value for the main datatype
 #ifndef HALF_MAX
@@ -136,11 +136,11 @@
 #endif // MIOPEN_USE_FP16
 
 #if MIOPEN_USE_FP32 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT float
 #else
 #define _FLOAT float
-#endif // __HIP_PLATFORM_HCC__
+#endif // __HIP_PLATFORM_AMD__
 #define SIZEOF_FLOAT 4
 // Max value for the main datatype
 #ifndef FLT_MAX
@@ -151,7 +151,7 @@
 #endif // MIOPEN_USE_FP32
 
 #if MIOPEN_USE_BFP16 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define FLOAT ushort
 #else
 #define _FLOAT ushort
@@ -162,7 +162,7 @@
 #endif // MIOPEN_USE_BFP16
 
 #if MIOPEN_USE_FP16 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define CVT_FLOAT2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
 #define CVT_ACCUM2FLOAT(x) (static_cast<FLOAT>(x))
 #define CVT_INTEGRAL2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
@@ -188,7 +188,7 @@
 /// refactoring should be considered as nontrivial and requires
 /// a separate PR. Let's keep this historical stuff for now.
 /// --atamazov 30.08.2023
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define CVT_FLOAT2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
 #define CVT_ACCUM2FLOAT(x) (static_cast<FLOAT>(x))
 #define CVT_INTEGRAL2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
@@ -202,7 +202,7 @@
 #endif // MIOPEN_USE_FP32
 
 #if MIOPEN_USE_BFP16 == 1
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define CVT_FLOAT2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
 #define CVT_ACCUM2FLOAT(x) MIOPEN_ERROR_NOT_IMLEMENTED
 #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
@@ -232,7 +232,7 @@
 #endif
 
 #if MIOPEN_USE_NATIVE_DATATYPE_ACCUM
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #undef FLOAT_ACCUM
 #define FLOAT_ACCUM MIOPEN_ERROR_NOT_IMLEMENTED
 #else
@@ -250,7 +250,7 @@
 #define CVT_FP32_2ACCUM(x) (CVT_FP32_2FLOAT(x))
 
 #undef CVT_INTEGRAL2ACCUM
-#ifdef __HIP_PLATFORM_HCC__
+#ifdef __HIP_PLATFORM_AMD__
 #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
 #else
 #if MIOPEN_USE_BFP16 == 1
diff --git a/src/kernels/hip_f8_impl.hpp b/src/kernels/hip_f8_impl.hpp
index c8d49cd474..8ff8255ceb 100644
--- a/src/kernels/hip_f8_impl.hpp
+++ b/src/kernels/hip_f8_impl.hpp
@@ -27,7 +27,7 @@
 // #include <half.hpp>
 namespace miopen_hip_f8_impl {
 
-#ifndef __HIP_PLATFORM_HCC__
+#ifndef __HIP_PLATFORM_AMD__
 using hip_bfloat16 = bfloat16;
 using half         = half_float::half;
 #endif
diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp
index 2947d6d713..d7ec875d17 100644
--- a/src/kernels/hip_float8.hpp
+++ b/src/kernels/hip_float8.hpp
@@ -29,7 +29,7 @@
 #endif
 
 // FP8 header version 0.4, 2021/05/11
-#if defined __HIP_PLATFORM_HCC__ && MIOPEN_ENABLE_F8_DEVICE_CODE
+#if defined __HIP_PLATFORM_AMD__ && MIOPEN_ENABLE_F8_DEVICE_CODE
 // MIOpen by default does not have device code in the regular compilation paths,
 // therefore, when this file is used from the host side, compilation takes much
 // longer. By guarding the __device__ directive we can control that such compilation

From f92cd4c415bcf5e3901b8bb1ba5ff515745ad700 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Tue, 19 Dec 2023 09:48:45 -0800
Subject: [PATCH 27/50] Update src/comgr.cpp

Co-authored-by: Artem Tamazov <artem.tamazov@gmail.com>
---
 src/comgr.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/comgr.cpp b/src/comgr.cpp
index 2c1b82560c..6d71daba26 100644
--- a/src/comgr.cpp
+++ b/src/comgr.cpp
@@ -1293,7 +1293,9 @@ void BuildHip(const std::string& name,
             miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit());
         compiler::lc::RemoveOptionsUnwanted(opts);
         opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073
-        opts.push_back("-D__HIP_PLATFORM_AMD__=1");            // Workaround?
+#if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL
+        opts.push_back("-D__HIP_PLATFORM_HCC__=1");            // Workaround?
+#endif
         opts.push_back("-D__HIP_PLATFORM_AMD__=1");            // Workaround?
 #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT
         if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name()))

From b81d3a5bb97f84bd946e8e6dd1dddd22b9231dd7 Mon Sep 17 00:00:00 2001
From: Jun Liu <Liu.Jun@amd.com>
Date: Tue, 19 Dec 2023 10:15:11 -0800
Subject: [PATCH 28/50] fix clang format issue

---
 src/comgr.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/comgr.cpp b/src/comgr.cpp
index 6d71daba26..08c61efbc7 100644
--- a/src/comgr.cpp
+++ b/src/comgr.cpp
@@ -1294,9 +1294,9 @@ void BuildHip(const std::string& name,
         compiler::lc::RemoveOptionsUnwanted(opts);
         opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073
 #if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL
-        opts.push_back("-D__HIP_PLATFORM_HCC__=1");            // Workaround?
+        opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
 #endif
-        opts.push_back("-D__HIP_PLATFORM_AMD__=1");            // Workaround?
+        opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround?
 #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT
         if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name()))
             opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1");

From ee4020ed68e3986d42b6252e6c6f4cfc0b4ee907 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Wed, 20 Dec 2023 01:56:57 +0300
Subject: [PATCH 29/50] fix-hiprtc-60(01) Add and use miopen_cstdint.hpp

---
 src/CMakeLists.txt                            | 23 ++++++-----
 src/comgr.cpp                                 |  6 +--
 .../batched_transpose.cpp                     |  2 +
 .../gpu_reference_kernel/naive_conv.cpp       | 17 +++-----
 src/kernels/hip_float8.hpp                    |  3 ++
 src/kernels/miopen_cstdint.hpp                | 40 +++++++++++++++++++
 6 files changed, 65 insertions(+), 26 deletions(-)
 create mode 100644 src/kernels/miopen_cstdint.hpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index bed3f18121..df7fffd0ac 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -379,23 +379,24 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
         kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f3x2_stride1.inc
         kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f3x2_stride2.inc
         kernels/Conv_Winograd_v30_3_1_metadata.inc
-        kernels/xform_bidirect_winograd_code.inc
-        kernels/rocm_version.inc
-        kernels/inst_wrappers.inc
+        kernels/bfloat16_dev.hpp
         kernels/conv_common.inc
-        kernels/utilities.inc
-        kernels/xform_data_filter.inc
-        kernels/xform_kd_cov2.inc
-        kernels/xform_metadata.inc
-        kernels/neuron.inc
         kernels/conv_sizes.inc
-        kernels/gpr_alloc.inc
-        kernels/bfloat16_dev.hpp
         kernels/float_types.h
-        kernels/workaround_issue_1431.hpp
+        kernels/gpr_alloc.inc
         kernels/hip_f8_impl.hpp
         kernels/hip_float8.hpp
+        kernels/inst_wrappers.inc
+        kernels/miopen_cstdint.hpp
+        kernels/neuron.inc
+        kernels/rocm_version.inc
         kernels/stride_array.hpp
+        kernels/utilities.inc
+        kernels/workaround_issue_1431.hpp
+        kernels/xform_bidirect_winograd_code.inc
+        kernels/xform_data_filter.inc
+        kernels/xform_kd_cov2.inc
+        kernels/xform_metadata.inc
         )
 
     set(MIOPEN_KERNELS
diff --git a/src/comgr.cpp b/src/comgr.cpp
index 4040881e09..3fd658514f 100644
--- a/src/comgr.cpp
+++ b/src/comgr.cpp
@@ -1292,9 +1292,9 @@ void BuildHip(const std::string& name,
         auto opts =
             miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit());
         compiler::lc::RemoveOptionsUnwanted(opts);
-        opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073
-        opts.push_back("-D__HIP_PLATFORM_HCC__=1");            // Workaround?
-        opts.push_back("-D__HIP_PLATFORM_AMD__=1");            // Workaround?
+        opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE=1"); // Workaround for SWDEV-308073
+        opts.push_back("-D__HIP_PLATFORM_HCC__=1");              // Workaround?
+        opts.push_back("-D__HIP_PLATFORM_AMD__=1");              // Workaround?
 #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT
         if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name()))
             opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1");
diff --git a/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp b/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp
index d1217c0f69..587b0b4191 100644
--- a/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp
+++ b/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp
@@ -28,6 +28,8 @@
 #include <hip/hip_fp16.h>
 #endif
 
+#include "miopen_cstdint.hpp"
+
 #ifndef BATCHED_TRANSPOSE_OCCUPANCY
 #define BATCHED_TRANSPOSE_OCCUPANCY 4
 #endif
diff --git a/src/kernels/gpu_reference_kernel/naive_conv.cpp b/src/kernels/gpu_reference_kernel/naive_conv.cpp
index 125eff94f3..ef6f9afc4b 100644
--- a/src/kernels/gpu_reference_kernel/naive_conv.cpp
+++ b/src/kernels/gpu_reference_kernel/naive_conv.cpp
@@ -28,23 +28,16 @@
 #include <hip/hip_runtime.h>
 #endif
 
-#ifdef __HIPCC_RTC__
 #ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// Definitions from <cstdint>, <cmath> conflict with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
-
-typedef signed char int8_t;
-typedef signed short int16_t;
 typedef float float_t;
-#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
-#include <limits> // std::numeric_limits
+#else
+#include <cmath> // float_t
 #endif
+#include "miopen_cstdint.hpp"
 
-#else
-#include <cstdint> // int8_t, int16_t
-#include <cmath>   // float_t
+#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
+#include <limits> // std::numeric_limits
 #endif
-#endif // __HIPCC_RTC__
 
 #include "stride_array.hpp"
 
diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp
index 2947d6d713..7814decae6 100644
--- a/src/kernels/hip_float8.hpp
+++ b/src/kernels/hip_float8.hpp
@@ -24,6 +24,9 @@
  *
  *******************************************************************************/
 #pragma once
+
+#include "miopen_cstdint.hpp"
+
 #ifndef MIOPEN_ENABLE_F8_DEVICE_CODE
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 0
 #endif
diff --git a/src/kernels/miopen_cstdint.hpp b/src/kernels/miopen_cstdint.hpp
new file mode 100644
index 0000000000..adcc9edf6d
--- /dev/null
+++ b/src/kernels/miopen_cstdint.hpp
@@ -0,0 +1,40 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
+/// Definitions from <cstdint>, <cmath> conflict with
+/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
+typedef signed char int8_t;
+typedef signed short int16_t;
+#if HIP_PACKAGE_VERSION_FLAT >= 6000023494ULL
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+#endif
+
+#else
+#include <cstdint> // int8_t, int16_t
+#endif

From f669dca8c2a945a8c2eeeb50a5d65a7434f86378 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Wed, 20 Dec 2023 01:58:04 +0300
Subject: [PATCH 30/50] fix-hiprtc-60(02) [tests] Improve logging of testing
 parameters during CMake phase

---
 test/CMakeLists.txt       | 5 +++++
 test/gtest/CMakeLists.txt | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 9940bfe1ff..5f4c1e3521 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -176,6 +176,10 @@ message(STATUS "MIOPEN_TEST_GPU_XNACK_ENABLED ${MIOPEN_TEST_GPU_XNACK_ENABLED}")
 message(STATUS "MIOPEN_TEST_GPU_DETECTION_FAILED ${MIOPEN_TEST_GPU_DETECTION_FAILED}")
 message(STATUS "MIOPEN_TEST_WITH_MIOPENDRIVER ${MIOPEN_TEST_WITH_MIOPENDRIVER}")
 message(STATUS "MIOPEN_TEST_MLIR ${MIOPEN_TEST_MLIR}")
+message(STATUS "MIOPEN_TEST_CONV ${MIOPEN_TEST_CONV}")
+message(STATUS "MIOPEN_TEST_DEEPBENCH ${MIOPEN_TEST_DEEPBENCH}")
+message(STATUS "MIOPEN_TEST_DRIVER_ITER_MODE ${MIOPEN_TEST_DRIVER_ITER_MODE}")
+message(STATUS "MIOPEN_TEST_COMPOSABLEKERNEL ${MIOPEN_TEST_COMPOSABLEKERNEL}")
 
 if(MIOPEN_TEST_DRIVER_ITER_MODE)
     add_definitions(-DMIOPEN_TEST_DRIVER_MODE=2)
@@ -234,6 +238,7 @@ message(STATUS "MIOPEN_TEST_FLOAT ${MIOPEN_TEST_FLOAT}")
 message(STATUS "MIOPEN_TEST_HALF ${MIOPEN_TEST_HALF}")
 message(STATUS "MIOPEN_TEST_BFLOAT16 ${MIOPEN_TEST_BFLOAT16}")
 message(STATUS "MIOPEN_TEST_INT8 ${MIOPEN_TEST_INT8}")
+message(STATUS "MIOPEN_TEST_ALL ${MIOPEN_TEST_ALL}")
 
 set_var_to_condition(WORKAROUND_ISSUE_1187_DEFAULT MIOPEN_TEST_GFX90A AND MIOPEN_TEST_FLOAT)
 option( WORKAROUND_ISSUE_1187 "" ${WORKAROUND_ISSUE_1187_DEFAULT})
diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt
index 2e80fb5eac..634841da3b 100644
--- a/test/gtest/CMakeLists.txt
+++ b/test/gtest/CMakeLists.txt
@@ -50,3 +50,8 @@ foreach(TEST ${TESTS})
     get_filename_component(BASE_NAME ${TEST} NAME_WE)
     add_gtest(${BASE_NAME})
 endforeach()
+
+message(STATUS "gtest env: MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR}")
+message(STATUS "gtest env: MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG}")
+message(STATUS "gtest env: MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL}")
+message(STATUS "gtest env: MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL}")

From 5756538e495b28ed955af50aca027e760e3524ba Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Wed, 20 Dec 2023 02:20:05 +0300
Subject: [PATCH 31/50] fix-hiprtc-60(04) [test_handle_test] Expect 5.7.x

---
 test/handle_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/handle_test.cpp b/test/handle_test.cpp
index ade06c5447..4467e410f5 100644
--- a/test/handle_test.cpp
+++ b/test/handle_test.cpp
@@ -30,7 +30,7 @@
 
 // https://gerrit-git.amd.com/c/compute/ec/clr/+/972441
 #define WORKAROUND_ISSUE_2600 \
-    (HIP_PACKAGE_VERSION_FLAT > 5007023384ULL && HIP_PACKAGE_VERSION_FLAT <= 6000023494ULL)
+    (HIP_PACKAGE_VERSION_FLAT > 6000000000ULL && HIP_PACKAGE_VERSION_FLAT <= 6000023494ULL)
 
 #include <miopen/config.h>
 #include <miopen/handle.hpp>

From c81a2373c56ef2ec78521efabba92ee918f26d9c Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Wed, 20 Dec 2023 21:43:30 +0300
Subject: [PATCH 32/50] fix-hiprtc-60(05) Add and use miopen_type_traits.hpp.
 Use miopen_cstdint.hpp in some kernels.

---
 src/CMakeLists.txt                            |   1 +
 .../include/utility/config.hpp                |   1 +
 .../include/utility/enable_if.hpp             |   2 +
 .../include/utility/magic_division.hpp        |   1 +
 .../include/utility/type.hpp                  |  79 +---------
 .../general_tensor_reorder_kernel_util.hpp    |  19 +--
 src/kernels/miopen_cstdint.hpp                |   1 +
 src/kernels/miopen_type_traits.hpp            | 135 ++++++++++++++++++
 .../utility/static_kernel_ck_utils_type.hpp   |  61 +-------
 9 files changed, 147 insertions(+), 153 deletions(-)
 create mode 100644 src/kernels/miopen_type_traits.hpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 69c55d75db..85c915df72 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -388,6 +388,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
         kernels/hip_float8.hpp
         kernels/inst_wrappers.inc
         kernels/miopen_cstdint.hpp
+        kernels/miopen_type_traits.hpp
         kernels/neuron.inc
         kernels/rocm_version.inc
         kernels/stride_array.hpp
diff --git a/src/composable_kernel/composable_kernel/include/utility/config.hpp b/src/composable_kernel/composable_kernel/include/utility/config.hpp
index 92307214f4..7869a075f2 100644
--- a/src/composable_kernel/composable_kernel/include/utility/config.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/config.hpp
@@ -6,6 +6,7 @@
 #include "hip/hip_fp16.h"
 #endif
 #include "bfloat16_dev.hpp"
+#include "miopen_cstdint.hpp"
 
 // "Constant" address space for kernel parameter
 #define CONSTANT __attribute__((address_space(4)))
diff --git a/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp b/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp
index 501e1bfc1c..30494214f8 100644
--- a/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp
@@ -1,6 +1,8 @@
 #ifndef CK_ENABLE_IF_HPP
 #define CK_ENABLE_IF_HPP
 
+#include "miopen_type_traits.hpp"
+
 namespace ck {
 
 template <bool B, typename T = void>
diff --git a/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp b/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp
index b7489016e9..174c697501 100644
--- a/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp
@@ -6,6 +6,7 @@
 #include "number.hpp"
 #include "type.hpp"
 #include "tuple.hpp"
+#include "miopen_cstdint.hpp"
 
 namespace ck {
 
diff --git a/src/composable_kernel/composable_kernel/include/utility/type.hpp b/src/composable_kernel/composable_kernel/include/utility/type.hpp
index 4e5d4e5134..17769d52c0 100644
--- a/src/composable_kernel/composable_kernel/include/utility/type.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/type.hpp
@@ -3,84 +3,7 @@
 
 #include "integral_constant.hpp"
 #include "enable_if.hpp"
-
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// We need <type_traits> for std::remove_reference and std::remove_cv.
-/// But <type_traits> also defines std::true_type, per Standard.
-/// However the latter definition conflicts with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h,
-/// which defines std::true_type as well (which is wrong).
-
-namespace std {
-
-template <class T>
-struct remove_reference
-{
-    typedef T type;
-};
-template <class T>
-struct remove_reference<T&>
-{
-    typedef T type;
-};
-template <class T>
-struct remove_reference<T&&>
-{
-    typedef T type;
-};
-
-template <class T>
-using remove_reference_t = typename remove_reference<T>::type;
-
-template <class T>
-struct remove_const
-{
-    typedef T type;
-};
-template <class T>
-struct remove_const<const T>
-{
-    typedef T type;
-};
-
-template <class T>
-struct remove_volatile
-{
-    typedef T type;
-};
-template <class T>
-struct remove_volatile<volatile T>
-{
-    typedef T type;
-};
-
-template <class T>
-struct remove_cv
-{
-    typedef typename remove_volatile<typename remove_const<T>::type>::type type;
-};
-
-template <class T>
-struct is_pointer_helper : std::false_type
-{
-};
-
-template <class T>
-struct is_pointer_helper<T*> : std::true_type
-{
-};
-
-template <class T>
-struct is_pointer : is_pointer_helper<typename std::remove_cv<T>::type>
-{
-};
-
-} // namespace std
-#else
-#include <type_traits> // std::remove_reference, std::remove_cv, is_pointer
-#endif
-#endif // __HIPCC_RTC__
+#include "miopen_type_traits.hpp"
 
 namespace ck {
 
diff --git a/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp b/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp
index c88fa3cbc6..a6e917c855 100644
--- a/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp
+++ b/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp
@@ -24,34 +24,23 @@
  *
  *******************************************************************************/
 #ifndef GENERAL_TENSOR_REORDER_UTIL_HPP
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// Definitions from <cstdint>, <cmath> conflict with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
-
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef unsigned int uint32_t;
-
-#else
-#include <cstdint> // int8_t, int16_t
-#endif
-#endif // __HIPCC_RTC__
+#define GENERAL_TENSOR_REORDER_UTIL_HPP
 
 #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
 #include <hip/hip_runtime.h>
 #include <hip/hip_fp16.h>
 #endif
 
+#include "miopen_cstdint.hpp"
+
 #ifndef TENSOR_REORDER_OCCUPANCY
 #define TENSOR_REORDER_OCCUPANCY 4
 #endif
-#define GENERAL_TENSOR_REORDER_UTIL_HPP
 
 template <int... Is>
 struct order
 {
-    static constexpr std::size_t m_size = sizeof...(Is);
+    static constexpr size_t m_size = sizeof...(Is);
     // the last dummy element is to prevent compiler complain about empty array, when mSize = 0
     static constexpr int m_data[m_size + 1] = {Is..., 0};
 
diff --git a/src/kernels/miopen_cstdint.hpp b/src/kernels/miopen_cstdint.hpp
index adcc9edf6d..ab237c805f 100644
--- a/src/kernels/miopen_cstdint.hpp
+++ b/src/kernels/miopen_cstdint.hpp
@@ -33,6 +33,7 @@ typedef signed short int16_t;
 #if HIP_PACKAGE_VERSION_FLAT >= 6000023494ULL
 typedef signed int int32_t;
 typedef unsigned int uint32_t;
+typedef __hip_internal::uint64_t uint64_t;
 #endif
 
 #else
diff --git a/src/kernels/miopen_type_traits.hpp b/src/kernels/miopen_type_traits.hpp
new file mode 100644
index 0000000000..7dbef7c31a
--- /dev/null
+++ b/src/kernels/miopen_type_traits.hpp
@@ -0,0 +1,135 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
+/// We need <type_traits> for std::remove_reference and std::remove_cv.
+/// But <type_traits> also defines std::true_type, per Standard.
+/// However the latter definition conflicts with
+/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h,
+/// which defines std::true_type as well (which is wrong).
+
+namespace std {
+
+template <class T>
+struct remove_reference
+{
+    typedef T type;
+};
+template <class T>
+struct remove_reference<T&>
+{
+    typedef T type;
+};
+template <class T>
+struct remove_reference<T&&>
+{
+    typedef T type;
+};
+
+template <class T>
+using remove_reference_t = typename remove_reference<T>::type;
+
+template <class T>
+struct remove_const
+{
+    typedef T type;
+};
+template <class T>
+struct remove_const<const T>
+{
+    typedef T type;
+};
+
+template <class T>
+struct remove_volatile
+{
+    typedef T type;
+};
+template <class T>
+struct remove_volatile<volatile T>
+{
+    typedef T type;
+};
+
+template <class T>
+struct remove_cv
+{
+    typedef typename remove_volatile<typename remove_const<T>::type>::type type;
+};
+
+#if HIP_PACKAGE_VERSION_FLAT >= 6000023494ULL
+template <class T, T v>
+struct integral_constant
+{
+    static constexpr T value = v;
+    using value_type         = T;
+    using type               = integral_constant;
+    constexpr operator value_type() const noexcept { return value; }
+    constexpr value_type operator()() const noexcept { return value; }
+};
+
+using true_type  = integral_constant<bool, true>;
+using false_type = integral_constant<bool, false>;
+
+template <class T, class U>
+struct is_same : false_type
+{
+};
+
+template <class T>
+struct is_same<T, T> : true_type
+{
+};
+
+template <bool B, typename T = void>
+using enable_if = __hip_internal::enable_if<B, T>;
+
+template <bool B, typename T = void>
+using enable_if_t = typename __hip_internal::enable_if<B, T>::type;
+#endif
+
+template <class T>
+struct is_pointer_helper : false_type
+{
+};
+
+template <class T>
+struct is_pointer_helper<T*> : true_type
+{
+};
+
+template <class T>
+struct is_pointer : is_pointer_helper<typename remove_cv<T>::type>
+{
+};
+
+} // namespace std
+#else
+
+#include <type_traits> // std::remove_reference, std::remove_cv, is_pointer
+
+#endif
diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp
index 19cf75624b..88036f8f4d 100644
--- a/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp
+++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp
@@ -2,66 +2,7 @@
 #define CK_UTILS_TYPE_HPP
 
 #include "static_kernel_integral_constant.hpp"
-
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// We need <type_traits> for std::remove_reference and std::remove_cv.
-/// But <type_traits> also defines std::true_type, per Standard.
-/// However the latter definition conflicts with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h,
-/// which defines std::true_type as well (which is wrong).
-
-namespace std {
-
-template <class T>
-struct remove_reference
-{
-    typedef T type;
-};
-template <class T>
-struct remove_reference<T&>
-{
-    typedef T type;
-};
-template <class T>
-struct remove_reference<T&&>
-{
-    typedef T type;
-};
-
-template <class T>
-struct remove_const
-{
-    typedef T type;
-};
-template <class T>
-struct remove_const<const T>
-{
-    typedef T type;
-};
-
-template <class T>
-struct remove_volatile
-{
-    typedef T type;
-};
-template <class T>
-struct remove_volatile<volatile T>
-{
-    typedef T type;
-};
-
-template <class T>
-struct remove_cv
-{
-    typedef typename remove_volatile<typename remove_const<T>::type>::type type;
-};
-
-} // namespace std
-#else
-#include <type_traits> // std::remove_reference, std::remove_cv
-#endif
-#endif // __HIPCC_RTC__
+#include "miopen_type_traits.hpp"
 
 namespace ck {
 

From 50740d065a6ebb649a2bee84b89ef26930bc180d Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Thu, 21 Dec 2023 23:36:58 +0300
Subject: [PATCH 33/50] fix-hiprtc-60(06) Add and use miopen_utility.hpp
 instead of <utility>

---
 src/CMakeLists.txt                            |  1 +
 .../include/utility/array.hpp                 | 29 +----------
 src/kernels/miopen_utility.hpp                | 52 +++++++++++++++++++
 .../include/utility/static_kernel_tuple.hpp   | 29 +----------
 4 files changed, 55 insertions(+), 56 deletions(-)
 create mode 100644 src/kernels/miopen_utility.hpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 85c915df72..ef8504f8b7 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -389,6 +389,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
         kernels/inst_wrappers.inc
         kernels/miopen_cstdint.hpp
         kernels/miopen_type_traits.hpp
+        kernels/miopen_utility.hpp
         kernels/neuron.inc
         kernels/rocm_version.inc
         kernels/stride_array.hpp
diff --git a/src/composable_kernel/composable_kernel/include/utility/array.hpp b/src/composable_kernel/composable_kernel/include/utility/array.hpp
index 23623560f7..63a912015c 100644
--- a/src/composable_kernel/composable_kernel/include/utility/array.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/array.hpp
@@ -4,34 +4,7 @@
 #include "functional2.hpp"
 #include "sequence.hpp"
 
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// We need <utility> for std::forward. In some cases, it includes <type_traits>
-/// (this is against the Standard, but it doesn't matter in this case).
-/// But <type_traits> also defines std::true_type, per Standard.
-/// However the latter definition conflicts with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h,
-/// which defines std::true_type as well (which is wrong).
-
-namespace std {
-
-template <typename T>
-constexpr T&& forward(typename remove_reference<T>::type& t_) noexcept
-{
-    return static_cast<T&&>(t_);
-}
-
-template <typename T>
-constexpr T&& forward(typename remove_reference<T>::type&& t_) noexcept
-{
-    return static_cast<T&&>(t_);
-}
-
-} // namespace std
-#else
-#include <utility> // std::forward
-#endif
-#endif // __HIPCC_RTC__
+#include "miopen_utility.hpp" // std::forward
 
 namespace ck {
 
diff --git a/src/kernels/miopen_utility.hpp b/src/kernels/miopen_utility.hpp
new file mode 100644
index 0000000000..db74017a9a
--- /dev/null
+++ b/src/kernels/miopen_utility.hpp
@@ -0,0 +1,52 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
+
+#include "miopen_type_traits.hpp" // std::remove_reference
+
+namespace std {
+
+template <typename T>
+constexpr T&& forward(typename remove_reference<T>::type& t_) noexcept
+{
+    return static_cast<T&&>(t_);
+}
+
+template <typename T>
+constexpr T&& forward(typename remove_reference<T>::type&& t_) noexcept
+{
+    return static_cast<T&&>(t_);
+}
+
+} // namespace std
+
+#else
+
+#include <utility>
+
+#endif
diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp
index 1118550815..343b9d388d 100644
--- a/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp
+++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp
@@ -5,34 +5,7 @@
 #include "static_kernel_ck_utils_type.hpp"
 #include "static_kernel_sequence.hpp"
 
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// We need <utility> for std::forward. In some cases, it includes <type_traits>
-/// (this is against the Standard, but it doesn't matter in this case).
-/// But <type_traits> also defines std::true_type, per Standard.
-/// However the latter definition conflicts with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h,
-/// which defines std::true_type as well (which is wrong).
-
-namespace std {
-
-template <typename T>
-constexpr T&& forward(typename remove_reference<T>::type& t_) noexcept
-{
-    return static_cast<T&&>(t_);
-}
-
-template <typename T>
-constexpr T&& forward(typename remove_reference<T>::type&& t_) noexcept
-{
-    return static_cast<T&&>(t_);
-}
-
-} // namespace std
-#else
-#include <utility> // std::forward
-#endif
-#endif // __HIPCC_RTC__
+#include "miopen_utility.hpp" // std::forward
 
 namespace ck {
 

From ae88a45a82aa47950de9e524a8b6d3a873624b92 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Thu, 21 Dec 2023 23:40:07 +0300
Subject: [PATCH 34/50] fix-hiprtc-60(07) [CMake] Log HIPRTC version

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 12c5d3f8e1..de712c5f55 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -381,7 +381,7 @@ if(MIOPEN_USE_HIPRTC)
         message(FATAL_ERROR "HIPRTC can be used only together with COMGR")
     endif()
     find_package(hiprtc REQUIRED)
-    message(STATUS "Build with HIPRTC")
+    message(STATUS "Build with HIPRTC ${hiprtc_VERSION}")
 endif()
 
 option(Boost_USE_STATIC_LIBS "Use boost static libraries" ON)

From 4a3b7ce253843bf973a6dac117a4748a51fbfa3c Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Thu, 21 Dec 2023 23:42:55 +0300
Subject: [PATCH 35/50] fix-hiprtc-60(08) [HIPRTC] Do not define
 __HIP_PLATFORM_HCC__ starting from HIP 6.0.0.

---
 src/comgr.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/comgr.cpp b/src/comgr.cpp
index 57ad815a11..5d4c71ec57 100644
--- a/src/comgr.cpp
+++ b/src/comgr.cpp
@@ -1293,7 +1293,7 @@ void BuildHip(const std::string& name,
             miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit());
         compiler::lc::RemoveOptionsUnwanted(opts);
         opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE=1"); // Workaround for SWDEV-308073
-#if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL
+#if HIP_PACKAGE_VERSION_MAJOR < 6
         opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
 #endif
         opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround?

From d0045b23702540553b946c0d06dc578074212f0f Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Thu, 21 Dec 2023 23:49:25 +0300
Subject: [PATCH 36/50] fix-hiprtc-60(09) [HIPRTC][PCH] More uses of
 miopen_type_traits.hpp and miopen_cstdint.hpp. Got rid of <cmath>. Added todo
 about <limits>

---
 .../include/utility/data_type.hpp             | 21 ++-------
 src/kernels/MIOpenCheckNumerics.cpp           | 43 +----------------
 .../gpu_reference_kernel/fp8_naive_conv.cpp   | 46 ++-----------------
 .../gpu_reference_kernel/naive_conv.cpp       |  5 --
 src/kernels/hip_f8_impl.hpp                   |  4 ++
 src/kernels/stride_array.hpp                  |  6 ---
 6 files changed, 16 insertions(+), 109 deletions(-)

diff --git a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp
index 4d21f91e6a..470c08fc00 100644
--- a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp
@@ -3,21 +3,10 @@
 
 #include "statically_indexed_array.hpp"
 
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// Definitions from <cstdint>, <cmath> conflict with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
-
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef float float_t;
-#include <limits> // std::numeric_limits
-
-#else
-#include <cstdint> // int8_t, int16_t
-#include <cmath>   // float_t
-#endif
-#endif // __HIPCC_RTC__
+#include "miopen_cstdint.hpp"
+
+/// \todo miopen_limits.hpp
+#include <limits>
 
 namespace ck {
 
@@ -978,7 +967,7 @@ struct inner_product_with_conversion
         return acc;
     }
 
-    __device__ T operator()(float_t a, float_t b) const { return convert(a) * convert(b); }
+    __device__ T operator()(float a, float b) const { return convert(a) * convert(b); }
 
     __device__ T operator()(int8x4_t a, int8x4_t b) const
     {
diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp
index 827f4d1397..ae67fcb941 100644
--- a/src/kernels/MIOpenCheckNumerics.cpp
+++ b/src/kernels/MIOpenCheckNumerics.cpp
@@ -30,47 +30,8 @@
 
 #include <hip/hip_bfloat16.h>
 
-// Copied over from naive_conv.cpp
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// Definitions from <cstdint>, <cmath> conflict with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
-
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef unsigned short uint16_t;
-typedef float float_t;
-
-// std::conditional requires type_traits which has a few other things
-// which result in collision with amd_hip_vector_types.h
-
-namespace std {
-template <bool predicate, typename X, typename Y>
-struct conditional;
-
-template <typename X, typename Y>
-struct conditional<true, X, Y>
-{
-    using type = X;
-};
-
-template <typename X, typename Y>
-struct conditional<false, X, Y>
-{
-    using type = Y;
-};
-
-template <bool predicate, typename X, typename Y>
-using conditional_t = typename conditional<predicate, X, Y>::type;
-} // namespace std
-#else
-#include <cstdint> // int8_t, int16_t
-#include <cmath>   // float_t
-#endif
-#endif // __HIPCC_RTC__
-
-#include <limits> // std::numeric_limits
+/// \todo miopen_limits.hpp
+#include <limits>
 
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 1
 #include "hip_float8.hpp"
diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
index f24a2d8813..a818311442 100644
--- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
+++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
@@ -30,49 +30,13 @@
 #include <hip/hip_bfloat16.h>
 #endif
 
-// Copied over from naive_conv.cpp
-#ifdef __HIPCC_RTC__
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// Definitions from <cstdint>, <cmath> conflict with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
+#include "miopen_cstdint.hpp"
+#include "miopen_type_traits.hpp"
 
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-typedef signed short int16_t;
-typedef unsigned short uint16_t;
-typedef float float_t;
-
-// std::conditional requires type_traits which has a few other things
-// which result in collition with amd_hip_vector_types.h
-
-namespace std {
-template <bool predicate, typename X, typename Y>
-struct conditional;
-
-template <typename X, typename Y>
-struct conditional<true, X, Y>
-{
-    using type = X;
-};
-
-template <typename X, typename Y>
-struct conditional<false, X, Y>
-{
-    using type = Y;
-};
-
-template <bool predicate, typename X, typename Y>
-using conditional_t = typename conditional<predicate, X, Y>::type;
-
-} // namespace std
-
-#else
-#include <cstdint> // int8_t, int16_t
-#include <cmath>   // float_t
-#endif
-#else // __HIPCC_RTC__
+/// \todo miopen_limits.hpp
+#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
 #include <limits>
-#endif // __HIPCC_RTC__
+#endif
 
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 1
 #include "hip_float8.hpp"
diff --git a/src/kernels/gpu_reference_kernel/naive_conv.cpp b/src/kernels/gpu_reference_kernel/naive_conv.cpp
index ef6f9afc4b..37eb824c8d 100644
--- a/src/kernels/gpu_reference_kernel/naive_conv.cpp
+++ b/src/kernels/gpu_reference_kernel/naive_conv.cpp
@@ -28,11 +28,6 @@
 #include <hip/hip_runtime.h>
 #endif
 
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-typedef float float_t;
-#else
-#include <cmath> // float_t
-#endif
 #include "miopen_cstdint.hpp"
 
 #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
diff --git a/src/kernels/hip_f8_impl.hpp b/src/kernels/hip_f8_impl.hpp
index 8ff8255ceb..d8583ba3aa 100644
--- a/src/kernels/hip_f8_impl.hpp
+++ b/src/kernels/hip_f8_impl.hpp
@@ -25,6 +25,10 @@
  *******************************************************************************/
 // #include <miopen/bfloat16.hpp>
 // #include <half.hpp>
+
+#include "miopen_cstdint.hpp"
+#include "miopen_type_traits.hpp"
+
 namespace miopen_hip_f8_impl {
 
 #ifndef __HIP_PLATFORM_AMD__
diff --git a/src/kernels/stride_array.hpp b/src/kernels/stride_array.hpp
index 32cb1f85b6..95c86fc79e 100644
--- a/src/kernels/stride_array.hpp
+++ b/src/kernels/stride_array.hpp
@@ -25,12 +25,6 @@
  *******************************************************************************/
 #pragma once
 
-#ifdef __HIPCC_RTC__
-#ifndef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-#include <cstdint>
-#endif
-#endif // __HIPCC_RTC__
-
 /// \todo Uncomment when hip RTC accepts std::array -- amberhassaan
 // #include <hip/amd_detail/amd_hip_vector_types.h>
 // using StrideIndexType = int;

From 1992ab3558dfc11bfd2aa2b2dc167bae43ff6fad Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Thu, 21 Dec 2023 23:51:20 +0300
Subject: [PATCH 37/50] fix-hiprtc-60(10) [F8] Improved build time for device.
 Added todo wrt fixing numeric_limits::min().

---
 src/kernels/hip_float8.hpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp
index d6c67fb79f..e8fd5b8147 100644
--- a/src/kernels/hip_float8.hpp
+++ b/src/kernels/hip_float8.hpp
@@ -37,7 +37,7 @@
 // therefore, when this file is used from the host side, compilation takes much
 // longer. By guarding the __device__ directive we can control that such compilation
 // only happens for kernels which include this file.
-#define MIOPEN_HIP_HOST_DEVICE __host__ __device__
+#define MIOPEN_HIP_HOST_DEVICE __device__
 #else
 #define MIOPEN_HIP_HOST_DEVICE
 #endif
@@ -513,6 +513,7 @@ class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>
         return miopen_f8::F8_Max<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>();
     }
 
+    /// \todo This is wrong. min() should minimum normalized positive value.
     static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8> min()
     {
         return static_cast<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>(-1.0f) *
@@ -540,6 +541,8 @@ class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>
         return static_cast<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>(
             miopen_f8::F8_Max<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>());
     }
+
+    /// \todo This is wrong. min() should minimum normalized positive value.
     static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8> min()
     {
         return static_cast<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>(-1.0f) *

From d0490559b68cc1bde0d75ba20d437d90adfa4b07 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Thu, 21 Dec 2023 23:53:35 +0300
Subject: [PATCH 38/50] fix-hiprtc-60(11) [HIPRTC][PCH] Improved
 miopen_cstdint.hpp. Prepare the fix for ROCm 6.1 RC with outdated HIP
 version.

---
 src/kernels/miopen_cstdint.hpp     |  6 +++---
 src/kernels/miopen_type_traits.hpp | 20 +++++++++++++++++++-
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/kernels/miopen_cstdint.hpp b/src/kernels/miopen_cstdint.hpp
index ab237c805f..ef1c6c5329 100644
--- a/src/kernels/miopen_cstdint.hpp
+++ b/src/kernels/miopen_cstdint.hpp
@@ -26,11 +26,11 @@
 #pragma once
 
 #ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// Definitions from <cstdint>, <cmath> conflict with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h.
 typedef signed char int8_t;
+typedef unsigned char uint8_t;
 typedef signed short int16_t;
-#if HIP_PACKAGE_VERSION_FLAT >= 6000023494ULL
+typedef unsigned short uint16_t;
+#if HIP_PACKAGE_VERSION_FLAT >= 6001000000ULL
 typedef signed int int32_t;
 typedef unsigned int uint32_t;
 typedef __hip_internal::uint64_t uint64_t;
diff --git a/src/kernels/miopen_type_traits.hpp b/src/kernels/miopen_type_traits.hpp
index 7dbef7c31a..2ef6aa9a16 100644
--- a/src/kernels/miopen_type_traits.hpp
+++ b/src/kernels/miopen_type_traits.hpp
@@ -81,7 +81,7 @@ struct remove_cv
     typedef typename remove_volatile<typename remove_const<T>::type>::type type;
 };
 
-#if HIP_PACKAGE_VERSION_FLAT >= 6000023494ULL
+#if HIP_PACKAGE_VERSION_FLAT >= 6001000000ULL
 template <class T, T v>
 struct integral_constant
 {
@@ -127,6 +127,24 @@ struct is_pointer : is_pointer_helper<typename remove_cv<T>::type>
 {
 };
 
+template <bool predicate, typename X, typename Y>
+struct conditional;
+
+template <typename X, typename Y>
+struct conditional<true, X, Y>
+{
+    using type = X;
+};
+
+template <typename X, typename Y>
+struct conditional<false, X, Y>
+{
+    using type = Y;
+};
+
+template <bool predicate, typename X, typename Y>
+using conditional_t = typename conditional<predicate, X, Y>::type;
+
 } // namespace std
 #else
 

From cae396dc7d73853048c376daa772e0ee73451ae6 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Thu, 21 Dec 2023 23:54:43 +0300
Subject: [PATCH 39/50] fix-hiprtc-60(12) [test_handle_test] Expect ROCm 6.1 RC
 with outdated HIP version.

---
 test/handle_test.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/handle_test.cpp b/test/handle_test.cpp
index 4467e410f5..ad38dd12b5 100644
--- a/test/handle_test.cpp
+++ b/test/handle_test.cpp
@@ -29,8 +29,12 @@
 #define WORKAROUND_SWDEV_257056_PCH_MISSING_MACROS 1
 
 // https://gerrit-git.amd.com/c/compute/ec/clr/+/972441
+// "HIP_PACKAGE_VERSION_FLAT == 6001000000ULL" is for ROCm 6.1 RC where issue #2600 is not 
+// yet fixed in the compiler. In order to test such release candidates, we have to
+// override HIP version to 6.1.0.
 #define WORKAROUND_ISSUE_2600 \
-    (HIP_PACKAGE_VERSION_FLAT > 6000000000ULL && HIP_PACKAGE_VERSION_FLAT <= 6000023494ULL)
+    ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT < 6001000000ULL) \
+    || HIP_PACKAGE_VERSION_FLAT == 6001000000ULL)
 
 #include <miopen/config.h>
 #include <miopen/handle.hpp>

From f6502c32fb6b68c3cd66acd80c3afd0ea816eed5 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Fri, 22 Dec 2023 00:02:07 +0300
Subject: [PATCH 40/50] fix-hiprtc-60(13) Formatting

---
 test/handle_test.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/handle_test.cpp b/test/handle_test.cpp
index ad38dd12b5..be337d8c0b 100644
--- a/test/handle_test.cpp
+++ b/test/handle_test.cpp
@@ -29,12 +29,12 @@
 #define WORKAROUND_SWDEV_257056_PCH_MISSING_MACROS 1
 
 // https://gerrit-git.amd.com/c/compute/ec/clr/+/972441
-// "HIP_PACKAGE_VERSION_FLAT == 6001000000ULL" is for ROCm 6.1 RC where issue #2600 is not 
+// "HIP_PACKAGE_VERSION_FLAT == 6001000000ULL" is for ROCm 6.1 RC where issue #2600 is not
 // yet fixed in the compiler. In order to test such release candidates, we have to
 // override HIP version to 6.1.0.
-#define WORKAROUND_ISSUE_2600 \
-    ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT < 6001000000ULL) \
-    || HIP_PACKAGE_VERSION_FLAT == 6001000000ULL)
+#define WORKAROUND_ISSUE_2600                                                                   \
+    ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT < 6001000000ULL) || \
+     HIP_PACKAGE_VERSION_FLAT == 6001000000ULL)
 
 #include <miopen/config.h>
 #include <miopen/handle.hpp>

From f3e11f0055a2e0e5c51a575b5b76c7dcee30080a Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Fri, 22 Dec 2023 17:46:42 +0300
Subject: [PATCH 41/50] [tests] Limit applicability of
 ConvFwdBiasActivAPI/ConvFwdBiasResAddActivTest.ConvFusedAPI

---
 test/gtest/fused_conv_bias_res_add_activ.cpp | 81 +++++++++++++++-----
 1 file changed, 61 insertions(+), 20 deletions(-)

diff --git a/test/gtest/fused_conv_bias_res_add_activ.cpp b/test/gtest/fused_conv_bias_res_add_activ.cpp
index a3d066d82d..eb39bd0035 100644
--- a/test/gtest/fused_conv_bias_res_add_activ.cpp
+++ b/test/gtest/fused_conv_bias_res_add_activ.cpp
@@ -25,14 +25,44 @@
  *******************************************************************************/
 #include <gtest/gtest.h>
 #include <miopen/miopen.h>
+#include <miopen/env.hpp>
+#if MIOPEN_USE_COMPOSABLEKERNEL
+#include <miopen/solver/ck_utility_common.hpp>
+#endif
 
 #include "tensor_util.hpp"
 #include "get_handle.hpp"
 
 #include "conv3d_test_case.hpp"
 
+MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL)
+MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG)
+
+#if MIOPEN_USE_COMPOSABLEKERNEL
+#define WORAROUND_ISSUE_2533 1
+#endif
+
 namespace conv_bias_act_res_add_fwd {
 
+bool TestIsApplicable()
+{
+#if MIOPEN_USE_COMPOSABLEKERNEL
+    const auto float_arg = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLOAT_ARG));
+    return
+#if WORAROUND_ISSUE_2533
+        miopen::solver::ck_utility::is_ck_whitelist(get_handle().GetDeviceName()) //
+#else
+    /// \todo Check against specific ASCIs.
+#endif
+        && (float_arg == "--half" // So far only test for fp16 is implemented.
+            || float_arg.empty()) // Empty when gtest is run without parameters.
+        && !miopen::IsDisabled(
+               ENV(MIOPEN_TEST_ALL)); // Not disabled when gtest is run without parameters.
+#else
+    return false;
+#endif
+}
+
 std::vector<Conv3DTestCase> ConvTestConfigs()
 { //         g, n, c, d,  h,  w, k,  z, y, x, pad_x pad_y pad_z stri_x stri_y stri_z dia_x dia_y
   //         dia_z
@@ -57,6 +87,8 @@ struct ConvFwdBiasResAddFixture
 protected:
     void SetUp() override
     {
+        if(!TestIsApplicable())
+            return;
 
         std::tie(algo, conv_config, alpha1, alpha2, tensor_layout) = GetParam();
 
@@ -94,6 +126,8 @@ struct ConvFwdBiasResAddFixture
     }
     void TearDown() override
     {
+        if(!TestIsApplicable())
+            return;
 
         miopenDestroyActivationDescriptor(activ_desc);
 
@@ -163,26 +197,33 @@ using namespace conv_bias_act_res_add_fwd;
 
 TEST_P(ConvFwdBiasResAddActivTest, ConvFusedAPI)
 {
-    auto status = miopenConvolutionBiasActivationForward(&get_handle(),
-                                                         &alpha1,
-                                                         &input.desc,
-                                                         in_dev.get(),
-                                                         &weights.desc,
-                                                         wei_dev.get(),
-                                                         &conv_desc,
-                                                         algo,
-                                                         nullptr, // workspace
-                                                         0ull,    // workspace size
-                                                         &alpha2,
-                                                         &z.desc,
-                                                         z_dev.get(),
-                                                         &bias.desc,
-                                                         bias_dev.get(),
-                                                         activ_desc,
-                                                         &output.desc,
-                                                         out_dev.get());
-
-    EXPECT_EQ(status, miopenStatusSuccess);
+    if(TestIsApplicable())
+    {
+        auto status = miopenConvolutionBiasActivationForward(&get_handle(),
+                                                             &alpha1,
+                                                             &input.desc,
+                                                             in_dev.get(),
+                                                             &weights.desc,
+                                                             wei_dev.get(),
+                                                             &conv_desc,
+                                                             algo,
+                                                             nullptr, // workspace
+                                                             0ull,    // workspace size
+                                                             &alpha2,
+                                                             &z.desc,
+                                                             z_dev.get(),
+                                                             &bias.desc,
+                                                             bias_dev.get(),
+                                                             activ_desc,
+                                                             &output.desc,
+                                                             out_dev.get());
+
+        EXPECT_EQ(status, miopenStatusSuccess);
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
 }
 
 INSTANTIATE_TEST_SUITE_P(ConvFwdBiasActivAPI,

From e59ae19ff3d850a898a92acdfedfe810fea3a09f Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Sat, 23 Dec 2023 01:41:28 +0300
Subject: [PATCH 42/50] fix-hiprtc-60(17) [HIPRTC][PCH] Add and use
 miopen_limits.hpp instead of <limits>

---
 src/CMakeLists.txt                            |  1 +
 src/kernels/MIOpenCheckNumerics.cpp           |  5 +-
 .../gpu_reference_kernel/fp8_naive_conv.cpp   |  6 +-
 .../gpu_reference_kernel/naive_conv.cpp       |  5 +-
 src/kernels/hip_float8.hpp                    | 21 ++++-
 src/kernels/miopen_limits.hpp                 | 94 +++++++++++++++++++
 .../static_kernel_reduction_operator.hpp      |  4 +-
 7 files changed, 118 insertions(+), 18 deletions(-)
 create mode 100644 src/kernels/miopen_limits.hpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d4ca81c639..bb5f8c71e9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -389,6 +389,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
         kernels/hip_float8.hpp
         kernels/inst_wrappers.inc
         kernels/miopen_cstdint.hpp
+        kernels/miopen_limits.hpp
         kernels/miopen_type_traits.hpp
         kernels/miopen_utility.hpp
         kernels/neuron.inc
diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp
index ae67fcb941..036da97728 100644
--- a/src/kernels/MIOpenCheckNumerics.cpp
+++ b/src/kernels/MIOpenCheckNumerics.cpp
@@ -30,12 +30,11 @@
 
 #include <hip/hip_bfloat16.h>
 
-/// \todo miopen_limits.hpp
-#include <limits>
-
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 1
 #include "hip_float8.hpp"
 
+#include "miopen_limits.hpp"
+
 struct Numerics
 {
     float sum;
diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
index a818311442..724ef3d5af 100644
--- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
+++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp
@@ -32,11 +32,7 @@
 
 #include "miopen_cstdint.hpp"
 #include "miopen_type_traits.hpp"
-
-/// \todo miopen_limits.hpp
-#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
-#include <limits>
-#endif
+#include "miopen_limits.hpp"
 
 #define MIOPEN_ENABLE_F8_DEVICE_CODE 1
 #include "hip_float8.hpp"
diff --git a/src/kernels/gpu_reference_kernel/naive_conv.cpp b/src/kernels/gpu_reference_kernel/naive_conv.cpp
index 37eb824c8d..d9a6c133d3 100644
--- a/src/kernels/gpu_reference_kernel/naive_conv.cpp
+++ b/src/kernels/gpu_reference_kernel/naive_conv.cpp
@@ -29,10 +29,7 @@
 #endif
 
 #include "miopen_cstdint.hpp"
-
-#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
-#include <limits> // std::numeric_limits
-#endif
+#include "miopen_limits.hpp"
 
 #include "stride_array.hpp"
 
diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp
index e8fd5b8147..fb81848e5b 100644
--- a/src/kernels/hip_float8.hpp
+++ b/src/kernels/hip_float8.hpp
@@ -32,12 +32,13 @@
 #endif
 
 // FP8 header version 0.4, 2021/05/11
+// Updated by atamazov 2023/12/22
 #if defined __HIP_PLATFORM_AMD__ && MIOPEN_ENABLE_F8_DEVICE_CODE
 // MIOpen by default does not have device code in the regular compilation paths,
 // therefore, when this file is used from the host side, compilation takes much
 // longer. By guarding the __device__ directive we can control that such compilation
 // only happens for kernels which include this file.
-#define MIOPEN_HIP_HOST_DEVICE __device__
+#define MIOPEN_HIP_HOST_DEVICE __host__ __device__
 #else
 #define MIOPEN_HIP_HOST_DEVICE
 #endif
@@ -519,6 +520,8 @@ class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>
         return static_cast<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>(-1.0f) *
                miopen_f8::F8_Max<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>();
     }
+
+    static constexpr int digits = 4;
 };
 
 template <>
@@ -548,11 +551,16 @@ class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>
         return static_cast<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>(-1.0f) *
                miopen_f8::F8_Max<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>();
     }
+
+    static constexpr int digits = 3;
 };
 
 } // namespace miopen_f8
 
-#ifndef __HIPCC_RTC__
+#ifdef __HIPCC_RTC__
+// Assume that if hipRTC is used, then we get <cmath> for F8
+// from the precompiled header.
+#else
 namespace std {
 inline bool isfinite(miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8> x) // NOLINT
 {
@@ -574,6 +582,14 @@ inline bool isnan(miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8> x) // NOLINT
     return x.is_nan();
 }
 
+} // namespace std
+#endif
+
+namespace std {
+
+template <typename T>
+class numeric_limits;
+
 template <>
 class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>
     : public miopen_f8::numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8>>
@@ -587,7 +603,6 @@ class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>
 };
 
 } // namespace std
-#endif
 
 template <miopen_f8::hip_f8_type T>
 struct hip_f8x4
diff --git a/src/kernels/miopen_limits.hpp b/src/kernels/miopen_limits.hpp
new file mode 100644
index 0000000000..8f237d3c7f
--- /dev/null
+++ b/src/kernels/miopen_limits.hpp
@@ -0,0 +1,94 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
+
+#include <hip/hip_bfloat16.h>
+
+#define MIOPEN_ENABLE_F8_DEVICE_CODE 1
+#include "hip_float8.hpp"
+
+namespace std {
+
+template <typename T>
+class numeric_limits;
+
+template <>
+class numeric_limits<float>
+{
+public:
+    static constexpr __device__ float max() noexcept { return 0x1.FFFFFEp+127f; }
+
+    static constexpr __device__ float min() noexcept { return 0x1p-126f; }
+};
+
+template <>
+class numeric_limits<_Float16>
+{
+public:
+    static constexpr __device__ _Float16 max() noexcept
+    {
+        return static_cast<_Float16>(0x1.FFCp+15f);
+    }
+
+    static constexpr __device__ _Float16 min() noexcept { return static_cast<_Float16>(0x1p-14f); }
+};
+
+template <>
+class numeric_limits<hip_bfloat16>
+{
+public:
+    static
+#if HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL
+        constexpr
+#endif
+        __device__ hip_bfloat16
+        max() noexcept
+    {
+        // data = 0x7F7F
+        return static_cast<hip_bfloat16>(0x1.FEp+127f);
+    }
+
+    static
+#if HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL
+        constexpr
+#endif
+        __device__ hip_bfloat16
+        min() noexcept
+    {
+        // data = 0x0080
+        return static_cast<hip_bfloat16>(0x1p-14f);
+    }
+};
+
+} // namespace std
+
+#else
+
+#include <limits>
+
+#endif
diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp
index e996e7a8b0..87e830954c 100644
--- a/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp
+++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp
@@ -26,9 +26,7 @@
 #ifndef CK_REDUCTION_OPERATOR_HPP
 #define CK_REDUCTION_OPERATOR_HPP
 
-#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
-#include <limits>
-#endif
+#include "miopen_limits.hpp"
 #include "static_kernel_reduction_common.hpp"
 
 namespace ck {

From d349c2d1e10c49ae4f3a9ba472aebb8e54ef44a0 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Sat, 23 Dec 2023 01:42:55 +0300
Subject: [PATCH 43/50] fix-hiprtc-60(18) [driver][tests] Fix F8 related build
 errors in gen_subnorm()

---
 driver/random.hpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/driver/random.hpp b/driver/random.hpp
index 66bbfaab80..8731471c62 100644
--- a/driver/random.hpp
+++ b/driver/random.hpp
@@ -109,7 +109,9 @@ inline T gen_subnorm()
     if constexpr(!std::is_integral_v<T> && !std::is_same_v<T, double> &&
                  details::has_digits<T>::value)
     {
-        using BitType = std::conditional_t<sizeof(T) == 2, uint16_t, uint32_t>;
+        using BitType = std::conditional_t<sizeof(T) == 1,
+                                           uint8_t,
+                                           std::conditional_t<sizeof(T) == 2, uint16_t, uint32_t>>;
         static_assert(sizeof(T) == sizeof(BitType));
 
         // -1 because ::digits counts the first implicit digit

From d445a0b3ea3455554ec4a310cb68cc1232fdcd1d Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Sat, 23 Dec 2023 23:23:19 +0300
Subject: [PATCH 44/50] fix-hiprtc-60(19) [HIPRTC][PCH] For now, use only
 standard <limits> to avoid possibility of correctness or performance
 regressions

---
 src/kernels/miopen_limits.hpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/kernels/miopen_limits.hpp b/src/kernels/miopen_limits.hpp
index 8f237d3c7f..abfa653120 100644
--- a/src/kernels/miopen_limits.hpp
+++ b/src/kernels/miopen_limits.hpp
@@ -25,7 +25,11 @@
  *******************************************************************************/
 #pragma once
 
-#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
+/// For now, use only standard <limits> to avoid possibility of correctnes
+/// or performance regressions.
+/// \todo Test and enable local implementation.
+/// #ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
+#if 0
 
 #include <hip/hip_bfloat16.h>
 

From 0d03807da5cb759d4b83e3fa0d7ada38e73be35d Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Sat, 23 Dec 2023 23:31:25 +0300
Subject: [PATCH 45/50] fix-hiprtc-60(20) Tidy fixes

---
 src/kernels/hip_float8.hpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp
index fb81848e5b..7ef6256808 100644
--- a/src/kernels/hip_float8.hpp
+++ b/src/kernels/hip_float8.hpp
@@ -561,6 +561,7 @@ class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>
 // Assume that if hipRTC is used, then we get <cmath> for F8
 // from the precompiled header.
 #else
+// NOLINTBEGIN(cert-dcl58-cpp)
 namespace std {
 inline bool isfinite(miopen_f8::hip_f8<miopen_f8::hip_f8_type::fp8> x) // NOLINT
 {
@@ -583,8 +584,10 @@ inline bool isnan(miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8> x) // NOLINT
 }
 
 } // namespace std
+  // NOLINTEND(cert-dcl58-cpp)
 #endif
 
+// NOLINTBEGIN(cert-dcl58-cpp)
 namespace std {
 
 template <typename T>
@@ -603,6 +606,7 @@ class numeric_limits<miopen_f8::hip_f8<miopen_f8::hip_f8_type::bf8>>
 };
 
 } // namespace std
+// NOLINTEND(cert-dcl58-cpp)
 
 template <miopen_f8::hip_f8_type T>
 struct hip_f8x4

From af0eed9b6a258a4a2913f00825794eaa845a3a3b Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Sat, 23 Dec 2023 23:32:35 +0300
Subject: [PATCH 46/50] fix-hiprtc-60(21) [gemm] Fix runtime error with debug
 build - remove incorrect assertion

---
 src/gemm_v2.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp
index 0be46017db..ec3fdd488f 100644
--- a/src/gemm_v2.cpp
+++ b/src/gemm_v2.cpp
@@ -115,8 +115,6 @@ static inline rocblas_computetype rocBlasComputeType_ex3(const miopen::GemmDescr
 
 static inline rocblas_datatype rocBlasComputeType(const miopen::GemmDescriptor& desc)
 {
-    // Complex compute types are only supported in newer version of the API
-    assert(desc.dataType == desc.a_cast_type && desc.dataType == desc.b_cast_type);
     if(desc.dataType == miopenInt8)
         return rocblas_datatype::rocblas_datatype_i32_r;
     else

From 3badc2618a915c6a02aec7ea4517312145c5a657 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Sat, 23 Dec 2023 23:33:37 +0300
Subject: [PATCH 47/50] fix-hiprtc-60(22) [NFC] Remove incorrect comment from
 miopen_type_traits.hpp

---
 src/kernels/miopen_type_traits.hpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/kernels/miopen_type_traits.hpp b/src/kernels/miopen_type_traits.hpp
index 2ef6aa9a16..1b2f8d4154 100644
--- a/src/kernels/miopen_type_traits.hpp
+++ b/src/kernels/miopen_type_traits.hpp
@@ -26,11 +26,6 @@
 #pragma once
 
 #ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
-/// We need <type_traits> for std::remove_reference and std::remove_cv.
-/// But <type_traits> also defines std::true_type, per Standard.
-/// However the latter definition conflicts with
-/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h,
-/// which defines std::true_type as well (which is wrong).
 
 namespace std {
 

From 0bcbe483f68589fc17027aed3df5c4927cbfc548 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Sun, 24 Dec 2023 01:52:34 +0300
Subject: [PATCH 48/50] fix-hiprtc-60(23) [NFC][HIPRTC][quality] Dismiss
 WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE, use MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
 instead

---
 src/comgr.cpp                      | 1 -
 src/kernels/miopen_cstdint.hpp     | 2 +-
 src/kernels/miopen_type_traits.hpp | 2 +-
 src/kernels/miopen_utility.hpp     | 2 +-
 4 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/comgr.cpp b/src/comgr.cpp
index 5d4c71ec57..b5ed19fcbf 100644
--- a/src/comgr.cpp
+++ b/src/comgr.cpp
@@ -1292,7 +1292,6 @@ void BuildHip(const std::string& name,
         auto opts =
             miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit());
         compiler::lc::RemoveOptionsUnwanted(opts);
-        opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE=1"); // Workaround for SWDEV-308073
 #if HIP_PACKAGE_VERSION_MAJOR < 6
         opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
 #endif
diff --git a/src/kernels/miopen_cstdint.hpp b/src/kernels/miopen_cstdint.hpp
index ef1c6c5329..57d0d088b1 100644
--- a/src/kernels/miopen_cstdint.hpp
+++ b/src/kernels/miopen_cstdint.hpp
@@ -25,7 +25,7 @@
  *******************************************************************************/
 #pragma once
 
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
+#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
 typedef signed char int8_t;
 typedef unsigned char uint8_t;
 typedef signed short int16_t;
diff --git a/src/kernels/miopen_type_traits.hpp b/src/kernels/miopen_type_traits.hpp
index 1b2f8d4154..352010e3ac 100644
--- a/src/kernels/miopen_type_traits.hpp
+++ b/src/kernels/miopen_type_traits.hpp
@@ -25,7 +25,7 @@
  *******************************************************************************/
 #pragma once
 
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
+#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
 
 namespace std {
 
diff --git a/src/kernels/miopen_utility.hpp b/src/kernels/miopen_utility.hpp
index db74017a9a..584ffad278 100644
--- a/src/kernels/miopen_utility.hpp
+++ b/src/kernels/miopen_utility.hpp
@@ -25,7 +25,7 @@
  *******************************************************************************/
 #pragma once
 
-#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE
+#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
 
 #include "miopen_type_traits.hpp" // std::remove_reference
 

From b0df12412525b7462c7b74ccb68858131bdaa41b Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Sun, 24 Dec 2023 19:50:29 +0300
Subject: [PATCH 49/50] fix-hiprtc-60(24) [NFC][HIPRTC][quality] Add and use
 WORKAROUND_DONT_USE_CUSTOM_LIMITS

---
 src/comgr.cpp                 |  6 +++++-
 src/kernels/miopen_limits.hpp | 10 +++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/comgr.cpp b/src/comgr.cpp
index b5ed19fcbf..971a60391e 100644
--- a/src/comgr.cpp
+++ b/src/comgr.cpp
@@ -1301,7 +1301,11 @@ void BuildHip(const std::string& name,
             opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1");
 #endif
         opts.push_back("-DHIP_PACKAGE_VERSION_FLAT=" + std::to_string(HIP_PACKAGE_VERSION_FLAT));
-        opts.push_back("-DMIOPEN_DONT_USE_HIP_RUNTIME_HEADERS=1");
+        opts.push_back("-DMIOPEN_DONT_USE_HIP_RUNTIME_HEADERS");
+        /// For now, use only standard <limits> to avoid possibility of
+        /// correctnes or performance regressions.
+        /// \todo Test and enable "custom" local implementation.
+        opts.push_back("-DWORKAROUND_DONT_USE_CUSTOM_LIMITS=1");
 #if WORKAROUND_ISSUE_1431
         if((StartsWith(target.Name(), "gfx10") || StartsWith(target.Name(), "gfx11")) &&
            !miopen::comgr::IsWave64Enforced(opts))
diff --git a/src/kernels/miopen_limits.hpp b/src/kernels/miopen_limits.hpp
index abfa653120..2a8f5e6178 100644
--- a/src/kernels/miopen_limits.hpp
+++ b/src/kernels/miopen_limits.hpp
@@ -25,11 +25,11 @@
  *******************************************************************************/
 #pragma once
 
-/// For now, use only standard <limits> to avoid possibility of correctnes
-/// or performance regressions.
-/// \todo Test and enable local implementation.
-/// #ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
-#if 0
+#ifndef WORKAROUND_DO_NOT_USE_CUSTOM_LIMITS
+#define WORKAROUND_DO_NOT_USE_CUSTOM_LIMITS 0
+#endif
+
+#if defined(MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS) && !WORKAROUND_DONT_USE_CUSTOM_LIMITS
 
 #include <hip/hip_bfloat16.h>
 

From 07e095e8b1215a5a95213d55937d8714690a7e79 Mon Sep 17 00:00:00 2001
From: atamazov <artem.tamazov@gmail.com>
Date: Sun, 24 Dec 2023 21:43:39 +0300
Subject: [PATCH 50/50] fix-hiprtc-60(26) Code clarity. <limits> leftover.

---
 .../composable_kernel/include/utility/data_type.hpp           | 4 +---
 test/handle_test.cpp                                          | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp
index 470c08fc00..01ae13a405 100644
--- a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp
+++ b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp
@@ -4,9 +4,7 @@
 #include "statically_indexed_array.hpp"
 
 #include "miopen_cstdint.hpp"
-
-/// \todo miopen_limits.hpp
-#include <limits>
+#include "miopen_limits.hpp"
 
 namespace ck {
 
diff --git a/test/handle_test.cpp b/test/handle_test.cpp
index be337d8c0b..ba143d7ad1 100644
--- a/test/handle_test.cpp
+++ b/test/handle_test.cpp
@@ -32,8 +32,8 @@
 // "HIP_PACKAGE_VERSION_FLAT == 6001000000ULL" is for ROCm 6.1 RC where issue #2600 is not
 // yet fixed in the compiler. In order to test such release candidates, we have to
 // override HIP version to 6.1.0.
-#define WORKAROUND_ISSUE_2600                                                                   \
-    ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT < 6001000000ULL) || \
+#define WORKAROUND_ISSUE_2600                                                                    \
+    ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT <= 6000999999ULL) || \
      HIP_PACKAGE_VERSION_FLAT == 6001000000ULL)
 
 #include <miopen/config.h>