From 309309b11a839de67196c26450ff7ffcac98af26 Mon Sep 17 00:00:00 2001 From: atamazov Date: Sun, 5 Nov 2023 01:39:43 +0300 Subject: [PATCH 01/50] Update docker and miopen version --- CMakeLists.txt | 2 +- Dockerfile | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fc1d2594e0..a8fc802244 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,7 +104,7 @@ if(NOT WIN32 AND NOT APPLE) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") endif() -rocm_setup_version(VERSION 3.00.0) +rocm_setup_version(VERSION 3.01.0) list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ) include(TargetFlags) diff --git a/Dockerfile b/Dockerfile index 4e4281958c..a97525c710 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 as miopen +FROM ubuntu:22.04 as miopen ARG DEBIAN_FRONTEND=noninteractive # Support multiarch @@ -18,17 +18,17 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg -RUN wget https://repo.radeon.com/amdgpu-install/5.7.1/ubuntu/focal/amdgpu-install_5.7.50701-1_all.deb --no-check-certificate +RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ - ./amdgpu-install_5.7.50701-1_all.deb + ./amdgpu-install_6.0.60000-1_all.deb # Add rocm repository -RUN export ROCM_APT_VER=5.7.1;\ +RUN export ROCM_APT_VER=6.0;\ echo $ROCM_APT_VER &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list' -RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list" +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\ +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list' +RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list" RUN amdgpu-install -y --usecase=rocm --no-dkms From 61e17213490a047c283d6e1e5c8af06d226e1680 Mon Sep 17 00:00:00 2001 From: atamazov Date: Sun, 5 Nov 2023 01:44:03 +0300 Subject: [PATCH 02/50] update FIN to develop --- fin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fin b/fin index ae2ff171f9..044f5e90c6 160000 --- a/fin +++ b/fin @@ -1 +1 @@ -Subproject commit ae2ff171f9803e2731092a1309ed71ffc18ec2c1 +Subproject commit 044f5e90c6ddb2184467e7029f39c2d7fba19d29 From 58c4d97cfcc75976ff2b867702cea6237288e57c Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Fri, 10 Nov 2023 17:56:32 -0800 Subject: [PATCH 03/50] update a few more requirements --- dev-requirements.txt | 2 +- requirements.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 6efae8c0de..788514f780 100755 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,3 +1,3 @@ ROCmSoftwarePlatform/rocm-recipes@d7b71f8ff71572833c8cf15b74279dd034e66f9d -f requirements.txt -danmar/cppcheck@2.9 +danmar/cppcheck@2.12.1 diff --git a/requirements.txt b/requirements.txt index d1864085e2..a42de6d4f5 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations " -ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH=/opt/conda/envs/py_3.9 -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0 From 39572c04540e8f9d0f874637522534ce88290a9a Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Sat, 11 Nov 2023 21:54:37 -0800 Subject: [PATCH 04/50] overrisde existing installed files --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a97525c710..3363a59264 100755 --- a/Dockerfile +++ b/Dockerfile @@ -98,9 +98,9 @@ RUN tar zxvf /tmp/ccache.tar.gz -C /tmp/ && mkdir /tmp/ccache-${CCACHE_COMMIT}/b RUN ccache -s ARG COMPILER_LAUNCHER="" RUN if [ "$USE_FIN" = "ON" ]; then \ - rbuild prepare -s fin -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ + sudo rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ else \ - rbuild prepare -s develop -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ + sudo rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ fi RUN ccache -s From 4a011d15d0f6cd54ffb37baf5cbc03b089968b54 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Sun, 12 Nov 2023 00:13:15 -0800 Subject: [PATCH 05/50] purge CK before installing new --- Dockerfile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3363a59264..d7e313227e 100755 --- a/Dockerfile +++ b/Dockerfile @@ -96,11 +96,17 @@ RUN tar zxvf /tmp/ccache.tar.gz -C /tmp/ && mkdir /tmp/ccache-${CCACHE_COMMIT}/b cd /tmp/ccache-${CCACHE_COMMIT}/build && \ cmake -DZSTD_FROM_INTERNET=ON -DHIREDIS_FROM_INTERNET=ON .. && make -j install && rm -rf /tmp/* RUN ccache -s + +# purge existing composable kernel installed with ROCm +# hence cannot use autoremove since it will remove more components +RUN apt-get update && \ +DEBIAN_FRONTEND=noninteractive apt-get purge -y --allow-unauthenticated \ + composablekernel-dev ARG COMPILER_LAUNCHER="" RUN if [ "$USE_FIN" = "ON" ]; then \ - sudo rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ + rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ else \ - sudo rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ + rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ fi RUN ccache -s From 58f8d80b1a2ffd6ed9d98b9d48f44726ca5c8d9c Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Sun, 12 Nov 2023 20:51:43 -0800 Subject: [PATCH 06/50] fix hip tidy issues --- CMakeLists.txt | 12 ++++++++++++ src/rnn_api.cpp | 2 +- test/na_train.cpp | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a8fc802244..ff24ae4a0e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -765,6 +765,18 @@ enable_cppcheck( knownConditionTrueFalse shadowFunction moduloofone + ################################################################### + # TODO Code Quality WORKAROUND ROCm 6.0 && + # Ubuntu 22.04 && cppcheck 2.12.1 update + ################################################################### + duplInheritedMember + constParameterReference + constParameterPointer + constVariableReference + constVariablePointer + useStlAlgorithm + uselessOverride + unusedScopedObject FORCE SOURCES addkernels/ diff --git a/src/rnn_api.cpp b/src/rnn_api.cpp index 8dce8f364e..e04dc5c2ea 100644 --- a/src/rnn_api.cpp +++ b/src/rnn_api.cpp @@ -529,7 +529,7 @@ static void LogCmdRNN(const miopenTensorDescriptor_t* xDesc, const int seqLength, const RNNDir_t dir) { - if(miopen::IsLoggingCmd()) + if(miopen::IsLoggingCmd() && seqLength > 0) { std::string mode; miopenRNNMode_t rnnMode = miopen::deref(rnnDesc).rnnMode; diff --git a/test/na_train.cpp b/test/na_train.cpp index 24529058ed..80f3fc781f 100644 --- a/test/na_train.cpp +++ b/test/na_train.cpp @@ -802,7 +802,7 @@ struct na_fusion_driver : test_driver std::size_t input_n, input_c, input_h, input_w; std::tie(input_n, input_c, input_h, input_w) = miopen::tien<4>(input.desc.GetLengths()); - this->tolerance = 80 * float(input.desc.GetElementSize()); + this->tolerance = 80 * double(input.desc.GetElementSize()); ptr_activdesc = GetManagedActivDesc(); miopenSetActivationDescriptor(ptr_activdesc.get(), activ_mode, alpha, beta, gamma); auto&& handle = get_handle(); From 7eefad5d2899fedf6085577f52a8441252276e74 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Mon, 13 Nov 2023 08:37:53 -0800 Subject: [PATCH 07/50] add a few missing ones --- CMakeLists.txt | 1 + src/convolution.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ff24ae4a0e..8356eb7da2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -770,6 +770,7 @@ enable_cppcheck( # Ubuntu 22.04 && cppcheck 2.12.1 update ################################################################### duplInheritedMember + constParameterCallback constParameterReference constParameterPointer constVariableReference diff --git a/src/convolution.cpp b/src/convolution.cpp index d0f9a64ffb..dbbe03eda3 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -289,7 +289,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& } } - std::size_t out_c; + std::size_t out_c = 0; std::vector out_lens(spatial_dim + 2); auto out_spatial = boost::adaptors::slice(out_lens, 2, 2 + spatial_dim); From ed1a7326ec49c886093359e17f820d89531406a4 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Mon, 13 Nov 2023 08:38:54 -0800 Subject: [PATCH 08/50] adopt review opinion --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index a42de6d4f5..be5f50c5d8 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations " +# ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH=/opt/conda/envs/py_3.9 -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0 From 8053a0cdee07b3a15f783bfb436f9f3631a5d8bf Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Mon, 13 Nov 2023 23:04:00 -0800 Subject: [PATCH 09/50] Update CMakeLists.txt Co-authored-by: JD --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8356eb7da2..468bcfbfeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,7 +104,7 @@ if(NOT WIN32 AND NOT APPLE) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") endif() -rocm_setup_version(VERSION 3.01.0) +rocm_setup_version(VERSION 3.1.0) list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ) include(TargetFlags) From f958b469798aa814c1220feabb92455687bfa556 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Tue, 14 Nov 2023 12:04:53 -0800 Subject: [PATCH 10/50] fix issues typo and hiprtc header --- docs/DebugAndLogging.md | 2 +- src/kernels/MIOpenCheckNumerics.cpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/DebugAndLogging.md b/docs/DebugAndLogging.md index 3ae5db123a..b1e497efcc 100644 --- a/docs/DebugAndLogging.md +++ b/docs/DebugAndLogging.md @@ -94,7 +94,7 @@ Direct Solutions: * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11` - `ConvOclDirectFwd11x11`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN` - `ConvOclDirectFwdGen`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD` - `ConvOclDirectFwd`. -* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd`. +* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd1x1`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2` - `ConvOclBwdWrW2` (where n = `{1,2,4,8,16}`), and `ConvOclBwdWrW2NonTunable`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53` - `ConvOclBwdWrW53`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1` - `ConvOclBwdWrW1x1` diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp index 827f4d1397..915f76a3d2 100644 --- a/src/kernels/MIOpenCheckNumerics.cpp +++ b/src/kernels/MIOpenCheckNumerics.cpp @@ -26,6 +26,7 @@ #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS #include #include +#include // std::numeric_limits #endif #include @@ -70,8 +71,6 @@ using conditional_t = typename conditional::type; #endif #endif // __HIPCC_RTC__ -#include // std::numeric_limits - #define MIOPEN_ENABLE_F8_DEVICE_CODE 1 #include "hip_float8.hpp" From 699cf08415bba38fc91c47dfae4f12df3eca70a5 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Thu, 16 Nov 2023 17:21:38 -0800 Subject: [PATCH 11/50] Keep base docker at Ubuntu 20.04 --- Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index d7e313227e..e3f9a89338 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:22.04 as miopen +FROM ubuntu:20.04 as miopen ARG DEBIAN_FRONTEND=noninteractive # Support multiarch @@ -18,7 +18,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg -RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate +RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/focal/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ./amdgpu-install_6.0.60000-1_all.deb @@ -26,9 +26,9 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ # Add rocm repository RUN export ROCM_APT_VER=6.0;\ echo $ROCM_APT_VER &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list' -RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list" +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\ +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list' +RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list" RUN amdgpu-install -y --usecase=rocm --no-dkms From 3e847dad39ff5f9e524ace83bf2187633109d87b Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Fri, 17 Nov 2023 13:29:22 -0800 Subject: [PATCH 12/50] Revert limitations on limits header --- src/kernels/MIOpenCheckNumerics.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp index 915f76a3d2..827f4d1397 100644 --- a/src/kernels/MIOpenCheckNumerics.cpp +++ b/src/kernels/MIOpenCheckNumerics.cpp @@ -26,7 +26,6 @@ #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS #include #include -#include // std::numeric_limits #endif #include @@ -71,6 +70,8 @@ using conditional_t = typename conditional::type; #endif #endif // __HIPCC_RTC__ +#include // std::numeric_limits + #define MIOPEN_ENABLE_F8_DEVICE_CODE 1 #include "hip_float8.hpp" From 72eb1800aa5be8819a4270ad4f396e483c56165b Mon Sep 17 00:00:00 2001 From: Umang Yadav Date: Tue, 5 Dec 2023 16:38:00 +0000 Subject: [PATCH 13/50] add fixes --- .../gpu_reference_kernel/fp8_naive_conv.cpp | 25 ++++++++- src/kernels/hip_float8.hpp | 56 ++++++++++++++----- 2 files changed, 64 insertions(+), 17 deletions(-) diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp index 3b4eabecfb..d294c9e182 100644 --- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp +++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp @@ -63,15 +63,36 @@ struct conditional template using conditional_t = typename conditional::type; + +template +struct integral_constant +{ + static constexpr T value = V; + using value_type = T; + using type = integral_constant; + constexpr operator value_type() const noexcept { return value; } + constexpr value_type operator()() const noexcept { return value; } + static constexpr type to() { return {}; } +}; + +template +using bool_constant = integral_constant; + +template +struct is_same : bool_constant<__is_same(T, U)> +{ +} + } // namespace std + #else #include // int8_t, int16_t #include // float_t #endif +#else // __HIPCC_RTC__ +#include #endif // __HIPCC_RTC__ -#include // std::numeric_limits - #define MIOPEN_ENABLE_F8_DEVICE_CODE 1 #include "hip_float8.hpp" diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp index a9b2a559a8..2947d6d713 100644 --- a/src/kernels/hip_float8.hpp +++ b/src/kernels/hip_float8.hpp @@ -83,6 +83,9 @@ inline MIOPEN_HIP_HOST_DEVICE bool get_hip_f8_bias_mode() #endif } +template +class numeric_limits; + template struct hip_f8 { @@ -262,8 +265,7 @@ struct hip_f8 inline MIOPEN_HIP_HOST_DEVICE bool operator==(const hip_f8& rhs) const { - if((rhs.is_zero() && this->is_zero()) || - (fabs(rhs - *this) < std::numeric_limits>::epsilon())) + if((rhs.is_zero() && this->is_zero()) || (this->data == rhs.data)) { return true; } @@ -487,19 +489,6 @@ MIOPEN_HIP_HOST_DEVICE T F8_Max() x.bits = 0x7F; return x.value; } -} // namespace miopen_f8 - -// define numeric limits for the new data type -namespace std { -inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT -{ - return x.is_inf(); -} - -inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT -{ - return x.is_inf(); -} template <> class numeric_limits> @@ -555,7 +544,44 @@ class numeric_limits> } }; +} // namespace miopen_f8 + +#ifndef __HIPCC_RTC__ +namespace std { +inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT +{ + return x.is_inf(); +} + +inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT +{ + return x.is_inf(); +} + +inline bool isnan(miopen_f8::hip_f8 x) // NOLINT +{ + return x.is_nan(); +} + +inline bool isnan(miopen_f8::hip_f8 x) // NOLINT +{ + return x.is_nan(); +} + +template <> +class numeric_limits> + : public miopen_f8::numeric_limits> +{ +}; + +template <> +class numeric_limits> + : public miopen_f8::numeric_limits> +{ +}; + } // namespace std +#endif template struct hip_f8x4 From 10e95817ac1334ea765b755f52f55c615794d60f Mon Sep 17 00:00:00 2001 From: Umang Yadav Date: Tue, 5 Dec 2023 18:16:52 +0000 Subject: [PATCH 14/50] changes that works for MIGraphX --- src/kernels/MIOpenCheckNumerics.cpp | 4 ++-- .../gpu_reference_kernel/fp8_naive_conv.cpp | 19 ------------------- src/kernels/hip_f8_impl.hpp | 8 ++++---- 3 files changed, 6 insertions(+), 25 deletions(-) diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp index 827f4d1397..8a363e4eb0 100644 --- a/src/kernels/MIOpenCheckNumerics.cpp +++ b/src/kernels/MIOpenCheckNumerics.cpp @@ -68,9 +68,9 @@ using conditional_t = typename conditional::type; #include // int8_t, int16_t #include // float_t #endif -#endif // __HIPCC_RTC__ - +#else // __HIPCC_RTC__ #include // std::numeric_limits +#endif // __HIPCC_RTC__ #define MIOPEN_ENABLE_F8_DEVICE_CODE 1 #include "hip_float8.hpp" diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp index d294c9e182..ed70941e6f 100644 --- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp +++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp @@ -64,25 +64,6 @@ struct conditional template using conditional_t = typename conditional::type; -template -struct integral_constant -{ - static constexpr T value = V; - using value_type = T; - using type = integral_constant; - constexpr operator value_type() const noexcept { return value; } - constexpr value_type operator()() const noexcept { return value; } - static constexpr type to() { return {}; } -}; - -template -using bool_constant = integral_constant; - -template -struct is_same : bool_constant<__is_same(T, U)> -{ -} - } // namespace std #else diff --git a/src/kernels/hip_f8_impl.hpp b/src/kernels/hip_f8_impl.hpp index c7a62f9f72..45edeffc4b 100644 --- a/src/kernels/hip_f8_impl.hpp +++ b/src/kernels/hip_f8_impl.hpp @@ -87,8 +87,8 @@ MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8_no_range_reduce(T _x, template MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8(T _x, bool stoch, uint32_t rng) { - constexpr bool is_half = std::is_same::value; - constexpr bool is_float = std::is_same::value; + constexpr bool is_half = __is_same_as(T, half); + constexpr bool is_float = __is_same_as(T, float); static_assert(wm + we == 7, "wm+we==7"); static_assert(is_half || is_float, "Only half and float can be cast to f8"); @@ -272,8 +272,8 @@ MIOPEN_HIP_HOST_DEVICE uint8_t cast_to_f8(T _x, bool stoch, uint32_t rng) template MIOPEN_HIP_HOST_DEVICE T cast_from_f8(uint8_t x) { - constexpr bool is_half = std::is_same::value; - constexpr bool is_float = std::is_same::value; + constexpr bool is_half = __is_same_as(T, half); + constexpr bool is_float = __is_same_as(T, float); static_assert(is_half || is_float, "only half and float are supported"); constexpr int weo = is_half ? 5 : 8; From f498789403bea40ce45cc74d34719bdf99b18e64 Mon Sep 17 00:00:00 2001 From: Umang Yadav Date: Tue, 5 Dec 2023 18:37:22 +0000 Subject: [PATCH 15/50] rever changes for checknumerics --- src/kernels/MIOpenCheckNumerics.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp index 8a363e4eb0..827f4d1397 100644 --- a/src/kernels/MIOpenCheckNumerics.cpp +++ b/src/kernels/MIOpenCheckNumerics.cpp @@ -68,10 +68,10 @@ using conditional_t = typename conditional::type; #include // int8_t, int16_t #include // float_t #endif -#else // __HIPCC_RTC__ -#include // std::numeric_limits #endif // __HIPCC_RTC__ +#include // std::numeric_limits + #define MIOPEN_ENABLE_F8_DEVICE_CODE 1 #include "hip_float8.hpp" From 69c0d99f3249086a505ddf434027efec738dfa93 Mon Sep 17 00:00:00 2001 From: Umang Yadav Date: Tue, 5 Dec 2023 20:12:12 +0000 Subject: [PATCH 16/50] Formatting --- src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp index ed70941e6f..f24a2d8813 100644 --- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp +++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp @@ -70,7 +70,7 @@ using conditional_t = typename conditional::type; #include // int8_t, int16_t #include // float_t #endif -#else // __HIPCC_RTC__ +#else // __HIPCC_RTC__ #include #endif // __HIPCC_RTC__ From cf02a64bc0daecd6f95e1c193fd91654cb8dc48e Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Tue, 5 Dec 2023 15:41:00 -0800 Subject: [PATCH 17/50] update docker file --- Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9ad2df53f1..c0dbb7b111 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 as miopen +FROM ubuntu:22.04 as miopen ARG DEBIAN_FRONTEND=noninteractive # Support multiarch @@ -18,7 +18,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg -RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/focal/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate +RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ./amdgpu-install_6.0.60000-1_all.deb @@ -26,9 +26,9 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ # Add rocm repository RUN export ROCM_APT_VER=6.0;\ echo $ROCM_APT_VER &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list' -RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list" +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\ +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list' +RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list" RUN amdgpu-install -y --usecase=rocm --no-dkms From ebeef1db7c3e8abbc7eefc897ecdedcc0d27ccfe Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Tue, 5 Dec 2023 16:08:47 -0800 Subject: [PATCH 18/50] avoid ldd conflicts --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 75cb1abe21..e636fdd4ab 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations " # ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build -ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH=/opt/conda/envs/py_3.9 -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda +ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0 ROCmSoftwarePlatform/eigen@3.4.0 From ee34b45dc09368eda9e6d319d85e57cbe5db80c1 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Tue, 5 Dec 2023 22:09:37 -0800 Subject: [PATCH 19/50] update CK commit hash --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e636fdd4ab..4237ba78ad 100755 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0 ROCmSoftwarePlatform/eigen@3.4.0 ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50 -ROCmSoftwarePlatform/composable_kernel@df467969684505876ef3a95fef94b77645836494 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON +ROCmSoftwarePlatform/composable_kernel@5e17095fea33e8de1a0b549a3ad705699318b8be -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON From c6b435284bd6d88012e6d0aa3a4241f003321118 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Fri, 15 Dec 2023 14:37:59 -0800 Subject: [PATCH 20/50] update dockerfile --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index c0dbb7b111..0d47e02dfd 100755 --- a/Dockerfile +++ b/Dockerfile @@ -18,16 +18,16 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg -RUN wget https://repo.radeon.com/amdgpu-install/.6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate +RUN wget https://repo.radeon.com/amdgpu-install/6.0.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ./amdgpu-install_6.0.60000-1_all.deb # Add rocm repository -RUN export ROCM_APT_VER=6.0;\ +RUN export ROCM_APT_VER=6.0.0;\ echo $ROCM_APT_VER &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/.$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/.apt_$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list' +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\ +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list' RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list" RUN amdgpu-install -y --usecase=rocm --no-dkms From 9990a5dcb706af720ca715bba37534c245da098d Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Sat, 16 Dec 2023 11:31:18 -0800 Subject: [PATCH 21/50] update dockerfile --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0d47e02dfd..fde9eafd61 100755 --- a/Dockerfile +++ b/Dockerfile @@ -18,13 +18,13 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg -RUN wget https://repo.radeon.com/amdgpu-install/6.0.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate +RUN wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ./amdgpu-install_6.0.60000-1_all.deb # Add rocm repository -RUN export ROCM_APT_VER=6.0.0;\ +RUN export ROCM_APT_VER=6.0;\ echo $ROCM_APT_VER &&\ sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\ sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list' From 0133956352f8f2b041ed68d018bdbe46e8ba6940 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Sat, 16 Dec 2023 19:34:13 -0800 Subject: [PATCH 22/50] workaround build issues of CK --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 06983fb0d1..f58fec2b49 100755 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0 ROCmSoftwarePlatform/eigen@3.4.0 ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50 -ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON +ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON -DCMAKE_CXX_FLAGS=" -Wno-unused-parameter " From dc55bba6330d6173703b4e732cfaf1d199620aac Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Sat, 16 Dec 2023 20:50:10 -0800 Subject: [PATCH 23/50] fix the real issue in compiling rocMLIR --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index f58fec2b49..506eb6803d 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations " # ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build -ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda +ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda -DCMAKE_CXX_FLAGS=" -Wno-unused-parameter " nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0 ROCmSoftwarePlatform/eigen@3.4.0 ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50 -ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON -DCMAKE_CXX_FLAGS=" -Wno-unused-parameter " +ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON From 0d48eb3bf9cf52fc402fa63387f51ea477e998bc Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Sat, 16 Dec 2023 23:59:40 -0800 Subject: [PATCH 24/50] bump CK commit hash --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 506eb6803d..2c323504f8 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations " # ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build -ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda -DCMAKE_CXX_FLAGS=" -Wno-unused-parameter " +ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0 ROCmSoftwarePlatform/eigen@3.4.0 ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50 -ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON +ROCmSoftwarePlatform/composable_kernel@55a89c746eb6cf7973c47fb9b2635e0f73bd2fc2 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON From 19b1a954295250250969bffcc9a76f562f835f02 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Sun, 17 Dec 2023 12:41:55 -0800 Subject: [PATCH 25/50] WA for Issue 2600 and turn on smoke tests by default --- Jenkinsfile | 6 +++--- test/handle_test.cpp | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 7e07d3deba..b1bbb916dd 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -436,15 +436,15 @@ pipeline { description: "") booleanParam( name: "BUILD_SMOKE_FP32", - defaultValue: env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false, + defaultValue: true, // env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false, description: "") booleanParam( name: "BUILD_SMOKE_AUX1", - defaultValue: env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false, + defaultValue: true, // env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false, description: "") booleanParam( name: "BUILD_SMOKE_FP16_BF16_INT8", - defaultValue: env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false, + defaultValue: true, // env.BRANCH_NAME == env.NIGHTLY_BRANCH ? true : false, description: "") booleanParam( name: "BUILD_FULL_TESTS", diff --git a/test/handle_test.cpp b/test/handle_test.cpp index 2548a7ad4b..d2620c579a 100644 --- a/test/handle_test.cpp +++ b/test/handle_test.cpp @@ -28,6 +28,10 @@ /// \todo Create dedicated ticket and rename macro. #define WORKAROUND_SWDEV_257056_PCH_MISSING_MACROS 1 +// https://gerrit-git.amd.com/c/compute/ec/clr/+/972441 +#define WORKAROUND_ISSUE_2600 \ + (HIP_PACKAGE_VERSION_FLAT > 5007023384ULL && HIP_PACKAGE_VERSION_FLAT <= 6000023494ULL) + #include #include #include @@ -231,7 +235,7 @@ std::string WriteNop(kernel_type_t kern_type) void test_warnings(kernel_type_t kern_type) { auto&& h = get_handle(); -#if MIOPEN_BUILD_DEV +#if MIOPEN_BUILD_DEV && !WORKAROUND_ISSUE_2600 if(kern_type == miopenOpenCLKernelType) { EXPECT(throws([&] { From 592d5cba905611c6f3bf17369a52d165f7811f57 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Mon, 18 Dec 2023 22:06:23 -0800 Subject: [PATCH 26/50] ROCm 6.0 replaces all __HIP_PLATFORM_HCC__ with __HIP_PLATFORM_AMD__ --- src/CMakeLists.txt | 2 +- src/comgr.cpp | 2 +- src/composable_kernel/.clang-tidy | 2 +- src/composable_kernel/cmake/ClangTidy.cmake | 2 +- .../external/rocm/include/bfloat16_dev.hpp | 10 +++--- src/kernels/bfloat16_dev.hpp | 10 +++--- src/kernels/float_types.h | 36 +++++++++---------- src/kernels/hip_f8_impl.hpp | 2 +- src/kernels/hip_float8.hpp | 2 +- 9 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1d5548db7e..df06e9785a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -770,7 +770,7 @@ elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP") endif() if(ENABLE_HIP_WORKAROUNDS) # Workaround hip not setting its usage requirements correctly - target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_HCC__=1 ) + target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_AMD__=1 ) endif() # This is helpful for the tests target_link_libraries( MIOpen INTERFACE $ ) diff --git a/src/comgr.cpp b/src/comgr.cpp index 4040881e09..2c1b82560c 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -1293,7 +1293,7 @@ void BuildHip(const std::string& name, miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit()); compiler::lc::RemoveOptionsUnwanted(opts); opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073 - opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? + opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name())) diff --git a/src/composable_kernel/.clang-tidy b/src/composable_kernel/.clang-tidy index 5c2b781687..8d0880abcf 100644 --- a/src/composable_kernel/.clang-tidy +++ b/src/composable_kernel/.clang-tidy @@ -1,3 +1,3 @@ CheckOptions: - key: bugprone-reserved-identifier.AllowedIdentifiers - value: '__HIP_PLATFORM_HCC__;__HIP_ROCclr__' + value: '__HIP_PLATFORM_AMD__;__HIP_ROCclr__' diff --git a/src/composable_kernel/cmake/ClangTidy.cmake b/src/composable_kernel/cmake/ClangTidy.cmake index 8de726de09..04ec12c326 100644 --- a/src/composable_kernel/cmake/ClangTidy.cmake +++ b/src/composable_kernel/cmake/ClangTidy.cmake @@ -149,7 +149,7 @@ function(clang_tidy_check TARGET) add_custom_target(${tidy_target} # for some targets clang-tidy not able to get information from .clang-tidy DEPENDS ${SOURCE} - COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" + COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_AMD__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..." ) diff --git a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp index f5fa35adfb..26d8645d61 100644 --- a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp +++ b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp @@ -30,7 +30,7 @@ extern "C" { #endif -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define EXECUTION_SPECIFIER __device__ #else #define EXECUTION_SPECIFIER @@ -43,7 +43,7 @@ typedef union // Composable kernels are written in HIP language. The language doesnt support // ushort2.hi or ushort2.low. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ ushort ushortvec[2]; #endif // MIOPEN_BACKEND_HIP float f32; @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val) { cvt_bf16_fp32_t target_val; -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.ushortx2 = make_ushort2(0, src_val); #else target_val.ushortx2 = (ushort2)(0, src_val); @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, // incrementing it causes it to become an exponent of 0xFF and a mantissa // of 0x00, which is Inf, the next higher value to the unrounded value. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1)); #else target_val.u32 += @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) #endif // MIOPEN_USE_RNE_BFLOAT16 } -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ return target_val.ushortvec[1]; #else return target_val.ushortx2.hi; diff --git a/src/kernels/bfloat16_dev.hpp b/src/kernels/bfloat16_dev.hpp index f5f24baa81..4b85a95975 100644 --- a/src/kernels/bfloat16_dev.hpp +++ b/src/kernels/bfloat16_dev.hpp @@ -30,7 +30,7 @@ extern "C" { #endif -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define EXECUTION_SPECIFIER __device__ #else #define EXECUTION_SPECIFIER @@ -43,7 +43,7 @@ typedef union cvt_bf16_fp32 // Composable kernels are written in HIP language. The language doesnt support // ushort2.hi or ushort2.low. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ ushort ushortvec[2]; #endif // MIOPEN_BACKEND_HIP float f32; @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val) { cvt_bf16_fp32_t target_val; -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.ushortx2 = make_ushort2(0, src_val); #else target_val.ushortx2 = (ushort2)(0, src_val); @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, // incrementing it causes it to become an exponent of 0xFF and a mantissa // of 0x00, which is Inf, the next higher value to the unrounded value. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1)); #else target_val.u32 += @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) #endif // MIOPEN_USE_RNE_BFLOAT16 } -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ return target_val.ushortvec[1]; #else return target_val.ushortx2.hi; diff --git a/src/kernels/float_types.h b/src/kernels/float_types.h index 5406ba85ec..beded11d8d 100644 --- a/src/kernels/float_types.h +++ b/src/kernels/float_types.h @@ -34,7 +34,7 @@ #define FOUR 4 #define EIGHT 8 #if MIOPEN_USE_FP8 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT hip_f8 #define FLOAT_ACCUM float // HIP implements the correct operators for conversion @@ -58,7 +58,7 @@ #endif // MIOPEN_USE_FP8 #if MIOPEN_USE_BFP8 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT hip_f8 #define FLOAT_ACCUM float #else @@ -79,7 +79,7 @@ // #endif #endif // MIOPEN_USE_BFP8 -#ifndef __HIP_PLATFORM_HCC__ +#ifndef __HIP_PLATFORM_AMD__ #define _FLOAT2 PPCAT(_FLOAT, TWO) #define _FLOAT4 PPCAT(_FLOAT, FOUR) #define _FLOAT8 PPCAT(_FLOAT, EIGHT) @@ -99,19 +99,19 @@ #endif #if MIOPEN_USE_DOUBLE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT_ACCUM double #else #pragma OPENCL EXTENSION cl_khr_fp64 : enable #define _FLOAT_ACCUM double -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define MAX_VAL_ACCUM DBL_MAX #else // MIOPEN_USE_DOUBLE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT_ACCUM float #else #define _FLOAT_ACCUM float -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #ifndef FLT_MAX #define MAX_VAL_ACCUM 3.402823466e+38F #else @@ -120,12 +120,12 @@ #endif // MIOPEN_USE_DOUBLE_ACCUM #if MIOPEN_USE_FP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT _Float16 -#else // __HIP_PLATFORM_HCC__ +#else // __HIP_PLATFORM_AMD__ #pragma OPENCL EXTENSION cl_khr_fp16 : enable #define _FLOAT half -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define SIZEOF_FLOAT 2 // Max value for the main datatype #ifndef HALF_MAX @@ -136,11 +136,11 @@ #endif // MIOPEN_USE_FP16 #if MIOPEN_USE_FP32 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT float #else #define _FLOAT float -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define SIZEOF_FLOAT 4 // Max value for the main datatype #ifndef FLT_MAX @@ -151,7 +151,7 @@ #endif // MIOPEN_USE_FP32 #if MIOPEN_USE_BFP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT ushort #else #define _FLOAT ushort @@ -162,7 +162,7 @@ #endif // MIOPEN_USE_BFP16 #if MIOPEN_USE_FP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) (static_cast(x)) #define CVT_ACCUM2FLOAT(x) (static_cast(x)) #define CVT_INTEGRAL2ACCUM(x) (static_cast(x)) @@ -188,7 +188,7 @@ /// refactoring should be considered as nontrivial and requires /// a separate PR. Let's keep this historical stuff for now. /// --atamazov 30.08.2023 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) (static_cast(x)) #define CVT_ACCUM2FLOAT(x) (static_cast(x)) #define CVT_INTEGRAL2ACCUM(x) (static_cast(x)) @@ -202,7 +202,7 @@ #endif // MIOPEN_USE_FP32 #if MIOPEN_USE_BFP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED #define CVT_ACCUM2FLOAT(x) MIOPEN_ERROR_NOT_IMLEMENTED #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED @@ -232,7 +232,7 @@ #endif #if MIOPEN_USE_NATIVE_DATATYPE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #undef FLOAT_ACCUM #define FLOAT_ACCUM MIOPEN_ERROR_NOT_IMLEMENTED #else @@ -250,7 +250,7 @@ #define CVT_FP32_2ACCUM(x) (CVT_FP32_2FLOAT(x)) #undef CVT_INTEGRAL2ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED #else #if MIOPEN_USE_BFP16 == 1 diff --git a/src/kernels/hip_f8_impl.hpp b/src/kernels/hip_f8_impl.hpp index c8d49cd474..8ff8255ceb 100644 --- a/src/kernels/hip_f8_impl.hpp +++ b/src/kernels/hip_f8_impl.hpp @@ -27,7 +27,7 @@ // #include namespace miopen_hip_f8_impl { -#ifndef __HIP_PLATFORM_HCC__ +#ifndef __HIP_PLATFORM_AMD__ using hip_bfloat16 = bfloat16; using half = half_float::half; #endif diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp index 2947d6d713..d7ec875d17 100644 --- a/src/kernels/hip_float8.hpp +++ b/src/kernels/hip_float8.hpp @@ -29,7 +29,7 @@ #endif // FP8 header version 0.4, 2021/05/11 -#if defined __HIP_PLATFORM_HCC__ && MIOPEN_ENABLE_F8_DEVICE_CODE +#if defined __HIP_PLATFORM_AMD__ && MIOPEN_ENABLE_F8_DEVICE_CODE // MIOpen by default does not have device code in the regular compilation paths, // therefore, when this file is used from the host side, compilation takes much // longer. By guarding the __device__ directive we can control that such compilation From f92cd4c415bcf5e3901b8bb1ba5ff515745ad700 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Tue, 19 Dec 2023 09:48:45 -0800 Subject: [PATCH 27/50] Update src/comgr.cpp Co-authored-by: Artem Tamazov --- src/comgr.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/comgr.cpp b/src/comgr.cpp index 2c1b82560c..6d71daba26 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -1293,7 +1293,9 @@ void BuildHip(const std::string& name, miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit()); compiler::lc::RemoveOptionsUnwanted(opts); opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073 - opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? +#if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL + opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? +#endif opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name())) From b81d3a5bb97f84bd946e8e6dd1dddd22b9231dd7 Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Tue, 19 Dec 2023 10:15:11 -0800 Subject: [PATCH 28/50] fix clang format issue --- src/comgr.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/comgr.cpp b/src/comgr.cpp index 6d71daba26..08c61efbc7 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -1294,9 +1294,9 @@ void BuildHip(const std::string& name, compiler::lc::RemoveOptionsUnwanted(opts); opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073 #if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL - opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? + opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? #endif - opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? + opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name())) opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1"); From ee4020ed68e3986d42b6252e6c6f4cfc0b4ee907 Mon Sep 17 00:00:00 2001 From: atamazov Date: Wed, 20 Dec 2023 01:56:57 +0300 Subject: [PATCH 29/50] fix-hiprtc-60(01) Add and use miopen_cstdint.hpp --- src/CMakeLists.txt | 23 ++++++----- src/comgr.cpp | 6 +-- .../batched_transpose.cpp | 2 + .../gpu_reference_kernel/naive_conv.cpp | 17 +++----- src/kernels/hip_float8.hpp | 3 ++ src/kernels/miopen_cstdint.hpp | 40 +++++++++++++++++++ 6 files changed, 65 insertions(+), 26 deletions(-) create mode 100644 src/kernels/miopen_cstdint.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bed3f18121..df7fffd0ac 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -379,23 +379,24 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f3x2_stride1.inc kernels/Conv_Winograd_v30_3_1_gfx11_fp32_f3x2_stride2.inc kernels/Conv_Winograd_v30_3_1_metadata.inc - kernels/xform_bidirect_winograd_code.inc - kernels/rocm_version.inc - kernels/inst_wrappers.inc + kernels/bfloat16_dev.hpp kernels/conv_common.inc - kernels/utilities.inc - kernels/xform_data_filter.inc - kernels/xform_kd_cov2.inc - kernels/xform_metadata.inc - kernels/neuron.inc kernels/conv_sizes.inc - kernels/gpr_alloc.inc - kernels/bfloat16_dev.hpp kernels/float_types.h - kernels/workaround_issue_1431.hpp + kernels/gpr_alloc.inc kernels/hip_f8_impl.hpp kernels/hip_float8.hpp + kernels/inst_wrappers.inc + kernels/miopen_cstdint.hpp + kernels/neuron.inc + kernels/rocm_version.inc kernels/stride_array.hpp + kernels/utilities.inc + kernels/workaround_issue_1431.hpp + kernels/xform_bidirect_winograd_code.inc + kernels/xform_data_filter.inc + kernels/xform_kd_cov2.inc + kernels/xform_metadata.inc ) set(MIOPEN_KERNELS diff --git a/src/comgr.cpp b/src/comgr.cpp index 4040881e09..3fd658514f 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -1292,9 +1292,9 @@ void BuildHip(const std::string& name, auto opts = miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit()); compiler::lc::RemoveOptionsUnwanted(opts); - opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073 - opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? - opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? + opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE=1"); // Workaround for SWDEV-308073 + opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? + opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name())) opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1"); diff --git a/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp b/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp index d1217c0f69..587b0b4191 100644 --- a/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp +++ b/src/kernels/gpu_batched_transpose_kernel/batched_transpose.cpp @@ -28,6 +28,8 @@ #include #endif +#include "miopen_cstdint.hpp" + #ifndef BATCHED_TRANSPOSE_OCCUPANCY #define BATCHED_TRANSPOSE_OCCUPANCY 4 #endif diff --git a/src/kernels/gpu_reference_kernel/naive_conv.cpp b/src/kernels/gpu_reference_kernel/naive_conv.cpp index 125eff94f3..ef6f9afc4b 100644 --- a/src/kernels/gpu_reference_kernel/naive_conv.cpp +++ b/src/kernels/gpu_reference_kernel/naive_conv.cpp @@ -28,23 +28,16 @@ #include #endif -#ifdef __HIPCC_RTC__ #ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// Definitions from , conflict with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. - -typedef signed char int8_t; -typedef signed short int16_t; typedef float float_t; -#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS -#include // std::numeric_limits +#else +#include // float_t #endif +#include "miopen_cstdint.hpp" -#else -#include // int8_t, int16_t -#include // float_t +#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS +#include // std::numeric_limits #endif -#endif // __HIPCC_RTC__ #include "stride_array.hpp" diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp index 2947d6d713..7814decae6 100644 --- a/src/kernels/hip_float8.hpp +++ b/src/kernels/hip_float8.hpp @@ -24,6 +24,9 @@ * *******************************************************************************/ #pragma once + +#include "miopen_cstdint.hpp" + #ifndef MIOPEN_ENABLE_F8_DEVICE_CODE #define MIOPEN_ENABLE_F8_DEVICE_CODE 0 #endif diff --git a/src/kernels/miopen_cstdint.hpp b/src/kernels/miopen_cstdint.hpp new file mode 100644 index 0000000000..adcc9edf6d --- /dev/null +++ b/src/kernels/miopen_cstdint.hpp @@ -0,0 +1,40 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE +/// Definitions from , conflict with +/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. +typedef signed char int8_t; +typedef signed short int16_t; +#if HIP_PACKAGE_VERSION_FLAT >= 6000023494ULL +typedef signed int int32_t; +typedef unsigned int uint32_t; +#endif + +#else +#include // int8_t, int16_t +#endif From f669dca8c2a945a8c2eeeb50a5d65a7434f86378 Mon Sep 17 00:00:00 2001 From: atamazov Date: Wed, 20 Dec 2023 01:58:04 +0300 Subject: [PATCH 30/50] fix-hiprtc-60(02) [tests] Improve logging of testing parameters during CMake phase --- test/CMakeLists.txt | 5 +++++ test/gtest/CMakeLists.txt | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9940bfe1ff..5f4c1e3521 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -176,6 +176,10 @@ message(STATUS "MIOPEN_TEST_GPU_XNACK_ENABLED ${MIOPEN_TEST_GPU_XNACK_ENABLED}") message(STATUS "MIOPEN_TEST_GPU_DETECTION_FAILED ${MIOPEN_TEST_GPU_DETECTION_FAILED}") message(STATUS "MIOPEN_TEST_WITH_MIOPENDRIVER ${MIOPEN_TEST_WITH_MIOPENDRIVER}") message(STATUS "MIOPEN_TEST_MLIR ${MIOPEN_TEST_MLIR}") +message(STATUS "MIOPEN_TEST_CONV ${MIOPEN_TEST_CONV}") +message(STATUS "MIOPEN_TEST_DEEPBENCH ${MIOPEN_TEST_DEEPBENCH}") +message(STATUS "MIOPEN_TEST_DRIVER_ITER_MODE ${MIOPEN_TEST_DRIVER_ITER_MODE}") +message(STATUS "MIOPEN_TEST_COMPOSABLEKERNEL ${MIOPEN_TEST_COMPOSABLEKERNEL}") if(MIOPEN_TEST_DRIVER_ITER_MODE) add_definitions(-DMIOPEN_TEST_DRIVER_MODE=2) @@ -234,6 +238,7 @@ message(STATUS "MIOPEN_TEST_FLOAT ${MIOPEN_TEST_FLOAT}") message(STATUS "MIOPEN_TEST_HALF ${MIOPEN_TEST_HALF}") message(STATUS "MIOPEN_TEST_BFLOAT16 ${MIOPEN_TEST_BFLOAT16}") message(STATUS "MIOPEN_TEST_INT8 ${MIOPEN_TEST_INT8}") +message(STATUS "MIOPEN_TEST_ALL ${MIOPEN_TEST_ALL}") set_var_to_condition(WORKAROUND_ISSUE_1187_DEFAULT MIOPEN_TEST_GFX90A AND MIOPEN_TEST_FLOAT) option( WORKAROUND_ISSUE_1187 "" ${WORKAROUND_ISSUE_1187_DEFAULT}) diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt index 2e80fb5eac..634841da3b 100644 --- a/test/gtest/CMakeLists.txt +++ b/test/gtest/CMakeLists.txt @@ -50,3 +50,8 @@ foreach(TEST ${TESTS}) get_filename_component(BASE_NAME ${TEST} NAME_WE) add_gtest(${BASE_NAME}) endforeach() + +message(STATUS "gtest env: MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR}") +message(STATUS "gtest env: MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG}") +message(STATUS "gtest env: MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL}") +message(STATUS "gtest env: MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL}") From 5756538e495b28ed955af50aca027e760e3524ba Mon Sep 17 00:00:00 2001 From: atamazov Date: Wed, 20 Dec 2023 02:20:05 +0300 Subject: [PATCH 31/50] fix-hiprtc-60(04) [test_handle_test] Expect 5.7.x --- test/handle_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/handle_test.cpp b/test/handle_test.cpp index ade06c5447..4467e410f5 100644 --- a/test/handle_test.cpp +++ b/test/handle_test.cpp @@ -30,7 +30,7 @@ // https://gerrit-git.amd.com/c/compute/ec/clr/+/972441 #define WORKAROUND_ISSUE_2600 \ - (HIP_PACKAGE_VERSION_FLAT > 5007023384ULL && HIP_PACKAGE_VERSION_FLAT <= 6000023494ULL) + (HIP_PACKAGE_VERSION_FLAT > 6000000000ULL && HIP_PACKAGE_VERSION_FLAT <= 6000023494ULL) #include #include From c81a2373c56ef2ec78521efabba92ee918f26d9c Mon Sep 17 00:00:00 2001 From: atamazov Date: Wed, 20 Dec 2023 21:43:30 +0300 Subject: [PATCH 32/50] fix-hiprtc-60(05) Add and use miopen_type_traits.hpp. Use miopen_cstdint.hpp in some kernels. --- src/CMakeLists.txt | 1 + .../include/utility/config.hpp | 1 + .../include/utility/enable_if.hpp | 2 + .../include/utility/magic_division.hpp | 1 + .../include/utility/type.hpp | 79 +--------- .../general_tensor_reorder_kernel_util.hpp | 19 +-- src/kernels/miopen_cstdint.hpp | 1 + src/kernels/miopen_type_traits.hpp | 135 ++++++++++++++++++ .../utility/static_kernel_ck_utils_type.hpp | 61 +------- 9 files changed, 147 insertions(+), 153 deletions(-) create mode 100644 src/kernels/miopen_type_traits.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 69c55d75db..85c915df72 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -388,6 +388,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN kernels/hip_float8.hpp kernels/inst_wrappers.inc kernels/miopen_cstdint.hpp + kernels/miopen_type_traits.hpp kernels/neuron.inc kernels/rocm_version.inc kernels/stride_array.hpp diff --git a/src/composable_kernel/composable_kernel/include/utility/config.hpp b/src/composable_kernel/composable_kernel/include/utility/config.hpp index 92307214f4..7869a075f2 100644 --- a/src/composable_kernel/composable_kernel/include/utility/config.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/config.hpp @@ -6,6 +6,7 @@ #include "hip/hip_fp16.h" #endif #include "bfloat16_dev.hpp" +#include "miopen_cstdint.hpp" // "Constant" address space for kernel parameter #define CONSTANT __attribute__((address_space(4))) diff --git a/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp b/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp index 501e1bfc1c..30494214f8 100644 --- a/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/enable_if.hpp @@ -1,6 +1,8 @@ #ifndef CK_ENABLE_IF_HPP #define CK_ENABLE_IF_HPP +#include "miopen_type_traits.hpp" + namespace ck { template diff --git a/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp b/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp index b7489016e9..174c697501 100644 --- a/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/magic_division.hpp @@ -6,6 +6,7 @@ #include "number.hpp" #include "type.hpp" #include "tuple.hpp" +#include "miopen_cstdint.hpp" namespace ck { diff --git a/src/composable_kernel/composable_kernel/include/utility/type.hpp b/src/composable_kernel/composable_kernel/include/utility/type.hpp index 4e5d4e5134..17769d52c0 100644 --- a/src/composable_kernel/composable_kernel/include/utility/type.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/type.hpp @@ -3,84 +3,7 @@ #include "integral_constant.hpp" #include "enable_if.hpp" - -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// We need for std::remove_reference and std::remove_cv. -/// But also defines std::true_type, per Standard. -/// However the latter definition conflicts with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h, -/// which defines std::true_type as well (which is wrong). - -namespace std { - -template -struct remove_reference -{ - typedef T type; -}; -template -struct remove_reference -{ - typedef T type; -}; -template -struct remove_reference -{ - typedef T type; -}; - -template -using remove_reference_t = typename remove_reference::type; - -template -struct remove_const -{ - typedef T type; -}; -template -struct remove_const -{ - typedef T type; -}; - -template -struct remove_volatile -{ - typedef T type; -}; -template -struct remove_volatile -{ - typedef T type; -}; - -template -struct remove_cv -{ - typedef typename remove_volatile::type>::type type; -}; - -template -struct is_pointer_helper : std::false_type -{ -}; - -template -struct is_pointer_helper : std::true_type -{ -}; - -template -struct is_pointer : is_pointer_helper::type> -{ -}; - -} // namespace std -#else -#include // std::remove_reference, std::remove_cv, is_pointer -#endif -#endif // __HIPCC_RTC__ +#include "miopen_type_traits.hpp" namespace ck { diff --git a/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp b/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp index c88fa3cbc6..a6e917c855 100644 --- a/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp +++ b/src/kernels/gpu_general_tensor_reorder_kernel/general_tensor_reorder_kernel_util.hpp @@ -24,34 +24,23 @@ * *******************************************************************************/ #ifndef GENERAL_TENSOR_REORDER_UTIL_HPP -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// Definitions from , conflict with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. - -typedef signed char int8_t; -typedef signed short int16_t; -typedef unsigned int uint32_t; - -#else -#include // int8_t, int16_t -#endif -#endif // __HIPCC_RTC__ +#define GENERAL_TENSOR_REORDER_UTIL_HPP #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS #include #include #endif +#include "miopen_cstdint.hpp" + #ifndef TENSOR_REORDER_OCCUPANCY #define TENSOR_REORDER_OCCUPANCY 4 #endif -#define GENERAL_TENSOR_REORDER_UTIL_HPP template struct order { - static constexpr std::size_t m_size = sizeof...(Is); + static constexpr size_t m_size = sizeof...(Is); // the last dummy element is to prevent compiler complain about empty array, when mSize = 0 static constexpr int m_data[m_size + 1] = {Is..., 0}; diff --git a/src/kernels/miopen_cstdint.hpp b/src/kernels/miopen_cstdint.hpp index adcc9edf6d..ab237c805f 100644 --- a/src/kernels/miopen_cstdint.hpp +++ b/src/kernels/miopen_cstdint.hpp @@ -33,6 +33,7 @@ typedef signed short int16_t; #if HIP_PACKAGE_VERSION_FLAT >= 6000023494ULL typedef signed int int32_t; typedef unsigned int uint32_t; +typedef __hip_internal::uint64_t uint64_t; #endif #else diff --git a/src/kernels/miopen_type_traits.hpp b/src/kernels/miopen_type_traits.hpp new file mode 100644 index 0000000000..7dbef7c31a --- /dev/null +++ b/src/kernels/miopen_type_traits.hpp @@ -0,0 +1,135 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE +/// We need for std::remove_reference and std::remove_cv. +/// But also defines std::true_type, per Standard. +/// However the latter definition conflicts with +/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h, +/// which defines std::true_type as well (which is wrong). + +namespace std { + +template +struct remove_reference +{ + typedef T type; +}; +template +struct remove_reference +{ + typedef T type; +}; +template +struct remove_reference +{ + typedef T type; +}; + +template +using remove_reference_t = typename remove_reference::type; + +template +struct remove_const +{ + typedef T type; +}; +template +struct remove_const +{ + typedef T type; +}; + +template +struct remove_volatile +{ + typedef T type; +}; +template +struct remove_volatile +{ + typedef T type; +}; + +template +struct remove_cv +{ + typedef typename remove_volatile::type>::type type; +}; + +#if HIP_PACKAGE_VERSION_FLAT >= 6000023494ULL +template +struct integral_constant +{ + static constexpr T value = v; + using value_type = T; + using type = integral_constant; + constexpr operator value_type() const noexcept { return value; } + constexpr value_type operator()() const noexcept { return value; } +}; + +using true_type = integral_constant; +using false_type = integral_constant; + +template +struct is_same : false_type +{ +}; + +template +struct is_same : true_type +{ +}; + +template +using enable_if = __hip_internal::enable_if; + +template +using enable_if_t = typename __hip_internal::enable_if::type; +#endif + +template +struct is_pointer_helper : false_type +{ +}; + +template +struct is_pointer_helper : true_type +{ +}; + +template +struct is_pointer : is_pointer_helper::type> +{ +}; + +} // namespace std +#else + +#include // std::remove_reference, std::remove_cv, is_pointer + +#endif diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp index 19cf75624b..88036f8f4d 100644 --- a/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp +++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_ck_utils_type.hpp @@ -2,66 +2,7 @@ #define CK_UTILS_TYPE_HPP #include "static_kernel_integral_constant.hpp" - -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// We need for std::remove_reference and std::remove_cv. -/// But also defines std::true_type, per Standard. -/// However the latter definition conflicts with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h, -/// which defines std::true_type as well (which is wrong). - -namespace std { - -template -struct remove_reference -{ - typedef T type; -}; -template -struct remove_reference -{ - typedef T type; -}; -template -struct remove_reference -{ - typedef T type; -}; - -template -struct remove_const -{ - typedef T type; -}; -template -struct remove_const -{ - typedef T type; -}; - -template -struct remove_volatile -{ - typedef T type; -}; -template -struct remove_volatile -{ - typedef T type; -}; - -template -struct remove_cv -{ - typedef typename remove_volatile::type>::type type; -}; - -} // namespace std -#else -#include // std::remove_reference, std::remove_cv -#endif -#endif // __HIPCC_RTC__ +#include "miopen_type_traits.hpp" namespace ck { From 50740d065a6ebb649a2bee84b89ef26930bc180d Mon Sep 17 00:00:00 2001 From: atamazov Date: Thu, 21 Dec 2023 23:36:58 +0300 Subject: [PATCH 33/50] fix-hiprtc-60(06) Add and use miopen_utility.hpp instead of --- src/CMakeLists.txt | 1 + .../include/utility/array.hpp | 29 +---------- src/kernels/miopen_utility.hpp | 52 +++++++++++++++++++ .../include/utility/static_kernel_tuple.hpp | 29 +---------- 4 files changed, 55 insertions(+), 56 deletions(-) create mode 100644 src/kernels/miopen_utility.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 85c915df72..ef8504f8b7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -389,6 +389,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN kernels/inst_wrappers.inc kernels/miopen_cstdint.hpp kernels/miopen_type_traits.hpp + kernels/miopen_utility.hpp kernels/neuron.inc kernels/rocm_version.inc kernels/stride_array.hpp diff --git a/src/composable_kernel/composable_kernel/include/utility/array.hpp b/src/composable_kernel/composable_kernel/include/utility/array.hpp index 23623560f7..63a912015c 100644 --- a/src/composable_kernel/composable_kernel/include/utility/array.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/array.hpp @@ -4,34 +4,7 @@ #include "functional2.hpp" #include "sequence.hpp" -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// We need for std::forward. In some cases, it includes -/// (this is against the Standard, but it doesn't matter in this case). -/// But also defines std::true_type, per Standard. -/// However the latter definition conflicts with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h, -/// which defines std::true_type as well (which is wrong). - -namespace std { - -template -constexpr T&& forward(typename remove_reference::type& t_) noexcept -{ - return static_cast(t_); -} - -template -constexpr T&& forward(typename remove_reference::type&& t_) noexcept -{ - return static_cast(t_); -} - -} // namespace std -#else -#include // std::forward -#endif -#endif // __HIPCC_RTC__ +#include "miopen_utility.hpp" // std::forward namespace ck { diff --git a/src/kernels/miopen_utility.hpp b/src/kernels/miopen_utility.hpp new file mode 100644 index 0000000000..db74017a9a --- /dev/null +++ b/src/kernels/miopen_utility.hpp @@ -0,0 +1,52 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE + +#include "miopen_type_traits.hpp" // std::remove_reference + +namespace std { + +template +constexpr T&& forward(typename remove_reference::type& t_) noexcept +{ + return static_cast(t_); +} + +template +constexpr T&& forward(typename remove_reference::type&& t_) noexcept +{ + return static_cast(t_); +} + +} // namespace std + +#else + +#include + +#endif diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp index 1118550815..343b9d388d 100644 --- a/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp +++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_tuple.hpp @@ -5,34 +5,7 @@ #include "static_kernel_ck_utils_type.hpp" #include "static_kernel_sequence.hpp" -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// We need for std::forward. In some cases, it includes -/// (this is against the Standard, but it doesn't matter in this case). -/// But also defines std::true_type, per Standard. -/// However the latter definition conflicts with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h, -/// which defines std::true_type as well (which is wrong). - -namespace std { - -template -constexpr T&& forward(typename remove_reference::type& t_) noexcept -{ - return static_cast(t_); -} - -template -constexpr T&& forward(typename remove_reference::type&& t_) noexcept -{ - return static_cast(t_); -} - -} // namespace std -#else -#include // std::forward -#endif -#endif // __HIPCC_RTC__ +#include "miopen_utility.hpp" // std::forward namespace ck { From ae88a45a82aa47950de9e524a8b6d3a873624b92 Mon Sep 17 00:00:00 2001 From: atamazov Date: Thu, 21 Dec 2023 23:40:07 +0300 Subject: [PATCH 34/50] fix-hiprtc-60(07) [CMake] Log HIPRTC version --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 12c5d3f8e1..de712c5f55 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -381,7 +381,7 @@ if(MIOPEN_USE_HIPRTC) message(FATAL_ERROR "HIPRTC can be used only together with COMGR") endif() find_package(hiprtc REQUIRED) - message(STATUS "Build with HIPRTC") + message(STATUS "Build with HIPRTC ${hiprtc_VERSION}") endif() option(Boost_USE_STATIC_LIBS "Use boost static libraries" ON) From 4a3b7ce253843bf973a6dac117a4748a51fbfa3c Mon Sep 17 00:00:00 2001 From: atamazov Date: Thu, 21 Dec 2023 23:42:55 +0300 Subject: [PATCH 35/50] fix-hiprtc-60(08) [HIPRTC] Do not define __HIP_PLATFORM_HCC__ starting from HIP 6.0.0. --- src/comgr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/comgr.cpp b/src/comgr.cpp index 57ad815a11..5d4c71ec57 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -1293,7 +1293,7 @@ void BuildHip(const std::string& name, miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit()); compiler::lc::RemoveOptionsUnwanted(opts); opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE=1"); // Workaround for SWDEV-308073 -#if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL +#if HIP_PACKAGE_VERSION_MAJOR < 6 opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? #endif opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? From d0045b23702540553b946c0d06dc578074212f0f Mon Sep 17 00:00:00 2001 From: atamazov Date: Thu, 21 Dec 2023 23:49:25 +0300 Subject: [PATCH 36/50] fix-hiprtc-60(09) [HIPRTC][PCH] More uses of miopen_type_traits.hpp and miopen_cstdint.hpp. Got rid of . Added todo about --- .../include/utility/data_type.hpp | 21 ++------- src/kernels/MIOpenCheckNumerics.cpp | 43 +---------------- .../gpu_reference_kernel/fp8_naive_conv.cpp | 46 ++----------------- .../gpu_reference_kernel/naive_conv.cpp | 5 -- src/kernels/hip_f8_impl.hpp | 4 ++ src/kernels/stride_array.hpp | 6 --- 6 files changed, 16 insertions(+), 109 deletions(-) diff --git a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp index 4d21f91e6a..470c08fc00 100644 --- a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp @@ -3,21 +3,10 @@ #include "statically_indexed_array.hpp" -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// Definitions from , conflict with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. - -typedef signed char int8_t; -typedef signed short int16_t; -typedef float float_t; -#include // std::numeric_limits - -#else -#include // int8_t, int16_t -#include // float_t -#endif -#endif // __HIPCC_RTC__ +#include "miopen_cstdint.hpp" + +/// \todo miopen_limits.hpp +#include namespace ck { @@ -978,7 +967,7 @@ struct inner_product_with_conversion return acc; } - __device__ T operator()(float_t a, float_t b) const { return convert(a) * convert(b); } + __device__ T operator()(float a, float b) const { return convert(a) * convert(b); } __device__ T operator()(int8x4_t a, int8x4_t b) const { diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp index 827f4d1397..ae67fcb941 100644 --- a/src/kernels/MIOpenCheckNumerics.cpp +++ b/src/kernels/MIOpenCheckNumerics.cpp @@ -30,47 +30,8 @@ #include -// Copied over from naive_conv.cpp -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// Definitions from , conflict with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. - -typedef unsigned char uint8_t; -typedef signed char int8_t; -typedef signed short int16_t; -typedef unsigned short uint16_t; -typedef float float_t; - -// std::conditional requires type_traits which has a few other things -// which result in collision with amd_hip_vector_types.h - -namespace std { -template -struct conditional; - -template -struct conditional -{ - using type = X; -}; - -template -struct conditional -{ - using type = Y; -}; - -template -using conditional_t = typename conditional::type; -} // namespace std -#else -#include // int8_t, int16_t -#include // float_t -#endif -#endif // __HIPCC_RTC__ - -#include // std::numeric_limits +/// \todo miopen_limits.hpp +#include #define MIOPEN_ENABLE_F8_DEVICE_CODE 1 #include "hip_float8.hpp" diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp index f24a2d8813..a818311442 100644 --- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp +++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp @@ -30,49 +30,13 @@ #include #endif -// Copied over from naive_conv.cpp -#ifdef __HIPCC_RTC__ -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// Definitions from , conflict with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. +#include "miopen_cstdint.hpp" +#include "miopen_type_traits.hpp" -typedef unsigned char uint8_t; -typedef signed char int8_t; -typedef signed short int16_t; -typedef unsigned short uint16_t; -typedef float float_t; - -// std::conditional requires type_traits which has a few other things -// which result in collition with amd_hip_vector_types.h - -namespace std { -template -struct conditional; - -template -struct conditional -{ - using type = X; -}; - -template -struct conditional -{ - using type = Y; -}; - -template -using conditional_t = typename conditional::type; - -} // namespace std - -#else -#include // int8_t, int16_t -#include // float_t -#endif -#else // __HIPCC_RTC__ +/// \todo miopen_limits.hpp +#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS #include -#endif // __HIPCC_RTC__ +#endif #define MIOPEN_ENABLE_F8_DEVICE_CODE 1 #include "hip_float8.hpp" diff --git a/src/kernels/gpu_reference_kernel/naive_conv.cpp b/src/kernels/gpu_reference_kernel/naive_conv.cpp index ef6f9afc4b..37eb824c8d 100644 --- a/src/kernels/gpu_reference_kernel/naive_conv.cpp +++ b/src/kernels/gpu_reference_kernel/naive_conv.cpp @@ -28,11 +28,6 @@ #include #endif -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -typedef float float_t; -#else -#include // float_t -#endif #include "miopen_cstdint.hpp" #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS diff --git a/src/kernels/hip_f8_impl.hpp b/src/kernels/hip_f8_impl.hpp index 8ff8255ceb..d8583ba3aa 100644 --- a/src/kernels/hip_f8_impl.hpp +++ b/src/kernels/hip_f8_impl.hpp @@ -25,6 +25,10 @@ *******************************************************************************/ // #include // #include + +#include "miopen_cstdint.hpp" +#include "miopen_type_traits.hpp" + namespace miopen_hip_f8_impl { #ifndef __HIP_PLATFORM_AMD__ diff --git a/src/kernels/stride_array.hpp b/src/kernels/stride_array.hpp index 32cb1f85b6..95c86fc79e 100644 --- a/src/kernels/stride_array.hpp +++ b/src/kernels/stride_array.hpp @@ -25,12 +25,6 @@ *******************************************************************************/ #pragma once -#ifdef __HIPCC_RTC__ -#ifndef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -#include -#endif -#endif // __HIPCC_RTC__ - /// \todo Uncomment when hip RTC accepts std::array -- amberhassaan // #include // using StrideIndexType = int; From 1992ab3558dfc11bfd2aa2b2dc167bae43ff6fad Mon Sep 17 00:00:00 2001 From: atamazov Date: Thu, 21 Dec 2023 23:51:20 +0300 Subject: [PATCH 37/50] fix-hiprtc-60(10) [F8] Improved build time for device. Added todo wrt fixing numeric_limits::min(). --- src/kernels/hip_float8.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp index d6c67fb79f..e8fd5b8147 100644 --- a/src/kernels/hip_float8.hpp +++ b/src/kernels/hip_float8.hpp @@ -37,7 +37,7 @@ // therefore, when this file is used from the host side, compilation takes much // longer. By guarding the __device__ directive we can control that such compilation // only happens for kernels which include this file. -#define MIOPEN_HIP_HOST_DEVICE __host__ __device__ +#define MIOPEN_HIP_HOST_DEVICE __device__ #else #define MIOPEN_HIP_HOST_DEVICE #endif @@ -513,6 +513,7 @@ class numeric_limits> return miopen_f8::F8_Max>(); } + /// \todo This is wrong. min() should minimum normalized positive value. static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 min() { return static_cast>(-1.0f) * @@ -540,6 +541,8 @@ class numeric_limits> return static_cast>( miopen_f8::F8_Max>()); } + + /// \todo This is wrong. min() should minimum normalized positive value. static MIOPEN_HIP_HOST_DEVICE miopen_f8::hip_f8 min() { return static_cast>(-1.0f) * From d0490559b68cc1bde0d75ba20d437d90adfa4b07 Mon Sep 17 00:00:00 2001 From: atamazov Date: Thu, 21 Dec 2023 23:53:35 +0300 Subject: [PATCH 38/50] fix-hiprtc-60(11) [HIPRTC][PCH] Improved miopen_cstdint.hpp. Prepare the fix for ROCm 6.1 RC with outdated HIP version. --- src/kernels/miopen_cstdint.hpp | 6 +++--- src/kernels/miopen_type_traits.hpp | 20 +++++++++++++++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/kernels/miopen_cstdint.hpp b/src/kernels/miopen_cstdint.hpp index ab237c805f..ef1c6c5329 100644 --- a/src/kernels/miopen_cstdint.hpp +++ b/src/kernels/miopen_cstdint.hpp @@ -26,11 +26,11 @@ #pragma once #ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// Definitions from , conflict with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h. typedef signed char int8_t; +typedef unsigned char uint8_t; typedef signed short int16_t; -#if HIP_PACKAGE_VERSION_FLAT >= 6000023494ULL +typedef unsigned short uint16_t; +#if HIP_PACKAGE_VERSION_FLAT >= 6001000000ULL typedef signed int int32_t; typedef unsigned int uint32_t; typedef __hip_internal::uint64_t uint64_t; diff --git a/src/kernels/miopen_type_traits.hpp b/src/kernels/miopen_type_traits.hpp index 7dbef7c31a..2ef6aa9a16 100644 --- a/src/kernels/miopen_type_traits.hpp +++ b/src/kernels/miopen_type_traits.hpp @@ -81,7 +81,7 @@ struct remove_cv typedef typename remove_volatile::type>::type type; }; -#if HIP_PACKAGE_VERSION_FLAT >= 6000023494ULL +#if HIP_PACKAGE_VERSION_FLAT >= 6001000000ULL template struct integral_constant { @@ -127,6 +127,24 @@ struct is_pointer : is_pointer_helper::type> { }; +template +struct conditional; + +template +struct conditional +{ + using type = X; +}; + +template +struct conditional +{ + using type = Y; +}; + +template +using conditional_t = typename conditional::type; + } // namespace std #else From cae396dc7d73853048c376daa772e0ee73451ae6 Mon Sep 17 00:00:00 2001 From: atamazov Date: Thu, 21 Dec 2023 23:54:43 +0300 Subject: [PATCH 39/50] fix-hiprtc-60(12) [test_handle_test] Expect ROCm 6.1 RC with outdated HIP version. --- test/handle_test.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/handle_test.cpp b/test/handle_test.cpp index 4467e410f5..ad38dd12b5 100644 --- a/test/handle_test.cpp +++ b/test/handle_test.cpp @@ -29,8 +29,12 @@ #define WORKAROUND_SWDEV_257056_PCH_MISSING_MACROS 1 // https://gerrit-git.amd.com/c/compute/ec/clr/+/972441 +// "HIP_PACKAGE_VERSION_FLAT == 6001000000ULL" is for ROCm 6.1 RC where issue #2600 is not +// yet fixed in the compiler. In order to test such release candidates, we have to +// override HIP version to 6.1.0. #define WORKAROUND_ISSUE_2600 \ - (HIP_PACKAGE_VERSION_FLAT > 6000000000ULL && HIP_PACKAGE_VERSION_FLAT <= 6000023494ULL) + ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT < 6001000000ULL) \ + || HIP_PACKAGE_VERSION_FLAT == 6001000000ULL) #include #include From f6502c32fb6b68c3cd66acd80c3afd0ea816eed5 Mon Sep 17 00:00:00 2001 From: atamazov Date: Fri, 22 Dec 2023 00:02:07 +0300 Subject: [PATCH 40/50] fix-hiprtc-60(13) Formatting --- test/handle_test.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/handle_test.cpp b/test/handle_test.cpp index ad38dd12b5..be337d8c0b 100644 --- a/test/handle_test.cpp +++ b/test/handle_test.cpp @@ -29,12 +29,12 @@ #define WORKAROUND_SWDEV_257056_PCH_MISSING_MACROS 1 // https://gerrit-git.amd.com/c/compute/ec/clr/+/972441 -// "HIP_PACKAGE_VERSION_FLAT == 6001000000ULL" is for ROCm 6.1 RC where issue #2600 is not +// "HIP_PACKAGE_VERSION_FLAT == 6001000000ULL" is for ROCm 6.1 RC where issue #2600 is not // yet fixed in the compiler. In order to test such release candidates, we have to // override HIP version to 6.1.0. -#define WORKAROUND_ISSUE_2600 \ - ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT < 6001000000ULL) \ - || HIP_PACKAGE_VERSION_FLAT == 6001000000ULL) +#define WORKAROUND_ISSUE_2600 \ + ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT < 6001000000ULL) || \ + HIP_PACKAGE_VERSION_FLAT == 6001000000ULL) #include #include From f3e11f0055a2e0e5c51a575b5b76c7dcee30080a Mon Sep 17 00:00:00 2001 From: atamazov Date: Fri, 22 Dec 2023 17:46:42 +0300 Subject: [PATCH 41/50] [tests] Limit applicability of ConvFwdBiasActivAPI/ConvFwdBiasResAddActivTest.ConvFusedAPI --- test/gtest/fused_conv_bias_res_add_activ.cpp | 81 +++++++++++++++----- 1 file changed, 61 insertions(+), 20 deletions(-) diff --git a/test/gtest/fused_conv_bias_res_add_activ.cpp b/test/gtest/fused_conv_bias_res_add_activ.cpp index a3d066d82d..eb39bd0035 100644 --- a/test/gtest/fused_conv_bias_res_add_activ.cpp +++ b/test/gtest/fused_conv_bias_res_add_activ.cpp @@ -25,14 +25,44 @@ *******************************************************************************/ #include #include +#include +#if MIOPEN_USE_COMPOSABLEKERNEL +#include +#endif #include "tensor_util.hpp" #include "get_handle.hpp" #include "conv3d_test_case.hpp" +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) + +#if MIOPEN_USE_COMPOSABLEKERNEL +#define WORAROUND_ISSUE_2533 1 +#endif + namespace conv_bias_act_res_add_fwd { +bool TestIsApplicable() +{ +#if MIOPEN_USE_COMPOSABLEKERNEL + const auto float_arg = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLOAT_ARG)); + return +#if WORAROUND_ISSUE_2533 + miopen::solver::ck_utility::is_ck_whitelist(get_handle().GetDeviceName()) // +#else + /// \todo Check against specific ASCIs. +#endif + && (float_arg == "--half" // So far only test for fp16 is implemented. + || float_arg.empty()) // Empty when gtest is run without parameters. + && !miopen::IsDisabled( + ENV(MIOPEN_TEST_ALL)); // Not disabled when gtest is run without parameters. +#else + return false; +#endif +} + std::vector ConvTestConfigs() { // g, n, c, d, h, w, k, z, y, x, pad_x pad_y pad_z stri_x stri_y stri_z dia_x dia_y // dia_z @@ -57,6 +87,8 @@ struct ConvFwdBiasResAddFixture protected: void SetUp() override { + if(!TestIsApplicable()) + return; std::tie(algo, conv_config, alpha1, alpha2, tensor_layout) = GetParam(); @@ -94,6 +126,8 @@ struct ConvFwdBiasResAddFixture } void TearDown() override { + if(!TestIsApplicable()) + return; miopenDestroyActivationDescriptor(activ_desc); @@ -163,26 +197,33 @@ using namespace conv_bias_act_res_add_fwd; TEST_P(ConvFwdBiasResAddActivTest, ConvFusedAPI) { - auto status = miopenConvolutionBiasActivationForward(&get_handle(), - &alpha1, - &input.desc, - in_dev.get(), - &weights.desc, - wei_dev.get(), - &conv_desc, - algo, - nullptr, // workspace - 0ull, // workspace size - &alpha2, - &z.desc, - z_dev.get(), - &bias.desc, - bias_dev.get(), - activ_desc, - &output.desc, - out_dev.get()); - - EXPECT_EQ(status, miopenStatusSuccess); + if(TestIsApplicable()) + { + auto status = miopenConvolutionBiasActivationForward(&get_handle(), + &alpha1, + &input.desc, + in_dev.get(), + &weights.desc, + wei_dev.get(), + &conv_desc, + algo, + nullptr, // workspace + 0ull, // workspace size + &alpha2, + &z.desc, + z_dev.get(), + &bias.desc, + bias_dev.get(), + activ_desc, + &output.desc, + out_dev.get()); + + EXPECT_EQ(status, miopenStatusSuccess); + } + else + { + GTEST_SKIP(); + } } INSTANTIATE_TEST_SUITE_P(ConvFwdBiasActivAPI, From e59ae19ff3d850a898a92acdfedfe810fea3a09f Mon Sep 17 00:00:00 2001 From: atamazov Date: Sat, 23 Dec 2023 01:41:28 +0300 Subject: [PATCH 42/50] fix-hiprtc-60(17) [HIPRTC][PCH] Add and use miopen_limits.hpp instead of --- src/CMakeLists.txt | 1 + src/kernels/MIOpenCheckNumerics.cpp | 5 +- .../gpu_reference_kernel/fp8_naive_conv.cpp | 6 +- .../gpu_reference_kernel/naive_conv.cpp | 5 +- src/kernels/hip_float8.hpp | 21 ++++- src/kernels/miopen_limits.hpp | 94 +++++++++++++++++++ .../static_kernel_reduction_operator.hpp | 4 +- 7 files changed, 118 insertions(+), 18 deletions(-) create mode 100644 src/kernels/miopen_limits.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d4ca81c639..bb5f8c71e9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -389,6 +389,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN kernels/hip_float8.hpp kernels/inst_wrappers.inc kernels/miopen_cstdint.hpp + kernels/miopen_limits.hpp kernels/miopen_type_traits.hpp kernels/miopen_utility.hpp kernels/neuron.inc diff --git a/src/kernels/MIOpenCheckNumerics.cpp b/src/kernels/MIOpenCheckNumerics.cpp index ae67fcb941..036da97728 100644 --- a/src/kernels/MIOpenCheckNumerics.cpp +++ b/src/kernels/MIOpenCheckNumerics.cpp @@ -30,12 +30,11 @@ #include -/// \todo miopen_limits.hpp -#include - #define MIOPEN_ENABLE_F8_DEVICE_CODE 1 #include "hip_float8.hpp" +#include "miopen_limits.hpp" + struct Numerics { float sum; diff --git a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp index a818311442..724ef3d5af 100644 --- a/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp +++ b/src/kernels/gpu_reference_kernel/fp8_naive_conv.cpp @@ -32,11 +32,7 @@ #include "miopen_cstdint.hpp" #include "miopen_type_traits.hpp" - -/// \todo miopen_limits.hpp -#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS -#include -#endif +#include "miopen_limits.hpp" #define MIOPEN_ENABLE_F8_DEVICE_CODE 1 #include "hip_float8.hpp" diff --git a/src/kernels/gpu_reference_kernel/naive_conv.cpp b/src/kernels/gpu_reference_kernel/naive_conv.cpp index 37eb824c8d..d9a6c133d3 100644 --- a/src/kernels/gpu_reference_kernel/naive_conv.cpp +++ b/src/kernels/gpu_reference_kernel/naive_conv.cpp @@ -29,10 +29,7 @@ #endif #include "miopen_cstdint.hpp" - -#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS -#include // std::numeric_limits -#endif +#include "miopen_limits.hpp" #include "stride_array.hpp" diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp index e8fd5b8147..fb81848e5b 100644 --- a/src/kernels/hip_float8.hpp +++ b/src/kernels/hip_float8.hpp @@ -32,12 +32,13 @@ #endif // FP8 header version 0.4, 2021/05/11 +// Updated by atamazov 2023/12/22 #if defined __HIP_PLATFORM_AMD__ && MIOPEN_ENABLE_F8_DEVICE_CODE // MIOpen by default does not have device code in the regular compilation paths, // therefore, when this file is used from the host side, compilation takes much // longer. By guarding the __device__ directive we can control that such compilation // only happens for kernels which include this file. -#define MIOPEN_HIP_HOST_DEVICE __device__ +#define MIOPEN_HIP_HOST_DEVICE __host__ __device__ #else #define MIOPEN_HIP_HOST_DEVICE #endif @@ -519,6 +520,8 @@ class numeric_limits> return static_cast>(-1.0f) * miopen_f8::F8_Max>(); } + + static constexpr int digits = 4; }; template <> @@ -548,11 +551,16 @@ class numeric_limits> return static_cast>(-1.0f) * miopen_f8::F8_Max>(); } + + static constexpr int digits = 3; }; } // namespace miopen_f8 -#ifndef __HIPCC_RTC__ +#ifdef __HIPCC_RTC__ +// Assume that if hipRTC is used, then we get for F8 +// from the precompiled header. +#else namespace std { inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT { @@ -574,6 +582,14 @@ inline bool isnan(miopen_f8::hip_f8 x) // NOLINT return x.is_nan(); } +} // namespace std +#endif + +namespace std { + +template +class numeric_limits; + template <> class numeric_limits> : public miopen_f8::numeric_limits> @@ -587,7 +603,6 @@ class numeric_limits> }; } // namespace std -#endif template struct hip_f8x4 diff --git a/src/kernels/miopen_limits.hpp b/src/kernels/miopen_limits.hpp new file mode 100644 index 0000000000..8f237d3c7f --- /dev/null +++ b/src/kernels/miopen_limits.hpp @@ -0,0 +1,94 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS + +#include + +#define MIOPEN_ENABLE_F8_DEVICE_CODE 1 +#include "hip_float8.hpp" + +namespace std { + +template +class numeric_limits; + +template <> +class numeric_limits +{ +public: + static constexpr __device__ float max() noexcept { return 0x1.FFFFFEp+127f; } + + static constexpr __device__ float min() noexcept { return 0x1p-126f; } +}; + +template <> +class numeric_limits<_Float16> +{ +public: + static constexpr __device__ _Float16 max() noexcept + { + return static_cast<_Float16>(0x1.FFCp+15f); + } + + static constexpr __device__ _Float16 min() noexcept { return static_cast<_Float16>(0x1p-14f); } +}; + +template <> +class numeric_limits +{ +public: + static +#if HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL + constexpr +#endif + __device__ hip_bfloat16 + max() noexcept + { + // data = 0x7F7F + return static_cast(0x1.FEp+127f); + } + + static +#if HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL + constexpr +#endif + __device__ hip_bfloat16 + min() noexcept + { + // data = 0x0080 + return static_cast(0x1p-14f); + } +}; + +} // namespace std + +#else + +#include + +#endif diff --git a/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp b/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp index e996e7a8b0..87e830954c 100644 --- a/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp +++ b/src/kernels/static_composable_kernel/include/utility/static_kernel_reduction_operator.hpp @@ -26,9 +26,7 @@ #ifndef CK_REDUCTION_OPERATOR_HPP #define CK_REDUCTION_OPERATOR_HPP -#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS -#include -#endif +#include "miopen_limits.hpp" #include "static_kernel_reduction_common.hpp" namespace ck { From d349c2d1e10c49ae4f3a9ba472aebb8e54ef44a0 Mon Sep 17 00:00:00 2001 From: atamazov Date: Sat, 23 Dec 2023 01:42:55 +0300 Subject: [PATCH 43/50] fix-hiprtc-60(18) [driver][tests] Fix F8 related build errors in gen_subnorm() --- driver/random.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/driver/random.hpp b/driver/random.hpp index 66bbfaab80..8731471c62 100644 --- a/driver/random.hpp +++ b/driver/random.hpp @@ -109,7 +109,9 @@ inline T gen_subnorm() if constexpr(!std::is_integral_v && !std::is_same_v && details::has_digits::value) { - using BitType = std::conditional_t; + using BitType = std::conditional_t>; static_assert(sizeof(T) == sizeof(BitType)); // -1 because ::digits counts the first implicit digit From d445a0b3ea3455554ec4a310cb68cc1232fdcd1d Mon Sep 17 00:00:00 2001 From: atamazov Date: Sat, 23 Dec 2023 23:23:19 +0300 Subject: [PATCH 44/50] fix-hiprtc-60(19) [HIPRTC][PCH] For now, use only standard to avoid possibility of correctness or performance regressions --- src/kernels/miopen_limits.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/kernels/miopen_limits.hpp b/src/kernels/miopen_limits.hpp index 8f237d3c7f..abfa653120 100644 --- a/src/kernels/miopen_limits.hpp +++ b/src/kernels/miopen_limits.hpp @@ -25,7 +25,11 @@ *******************************************************************************/ #pragma once -#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS +/// For now, use only standard to avoid possibility of correctnes +/// or performance regressions. +/// \todo Test and enable local implementation. +/// #ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS +#if 0 #include From 0d03807da5cb759d4b83e3fa0d7ada38e73be35d Mon Sep 17 00:00:00 2001 From: atamazov Date: Sat, 23 Dec 2023 23:31:25 +0300 Subject: [PATCH 45/50] fix-hiprtc-60(20) Tidy fixes --- src/kernels/hip_float8.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp index fb81848e5b..7ef6256808 100644 --- a/src/kernels/hip_float8.hpp +++ b/src/kernels/hip_float8.hpp @@ -561,6 +561,7 @@ class numeric_limits> // Assume that if hipRTC is used, then we get for F8 // from the precompiled header. #else +// NOLINTBEGIN(cert-dcl58-cpp) namespace std { inline bool isfinite(miopen_f8::hip_f8 x) // NOLINT { @@ -583,8 +584,10 @@ inline bool isnan(miopen_f8::hip_f8 x) // NOLINT } } // namespace std + // NOLINTEND(cert-dcl58-cpp) #endif +// NOLINTBEGIN(cert-dcl58-cpp) namespace std { template @@ -603,6 +606,7 @@ class numeric_limits> }; } // namespace std +// NOLINTEND(cert-dcl58-cpp) template struct hip_f8x4 From af0eed9b6a258a4a2913f00825794eaa845a3a3b Mon Sep 17 00:00:00 2001 From: atamazov Date: Sat, 23 Dec 2023 23:32:35 +0300 Subject: [PATCH 46/50] fix-hiprtc-60(21) [gemm] Fix runtime error with debug build - remove incorrect assertion --- src/gemm_v2.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index 0be46017db..ec3fdd488f 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -115,8 +115,6 @@ static inline rocblas_computetype rocBlasComputeType_ex3(const miopen::GemmDescr static inline rocblas_datatype rocBlasComputeType(const miopen::GemmDescriptor& desc) { - // Complex compute types are only supported in newer version of the API - assert(desc.dataType == desc.a_cast_type && desc.dataType == desc.b_cast_type); if(desc.dataType == miopenInt8) return rocblas_datatype::rocblas_datatype_i32_r; else From 3badc2618a915c6a02aec7ea4517312145c5a657 Mon Sep 17 00:00:00 2001 From: atamazov Date: Sat, 23 Dec 2023 23:33:37 +0300 Subject: [PATCH 47/50] fix-hiprtc-60(22) [NFC] Remove incorrect comment from miopen_type_traits.hpp --- src/kernels/miopen_type_traits.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/kernels/miopen_type_traits.hpp b/src/kernels/miopen_type_traits.hpp index 2ef6aa9a16..1b2f8d4154 100644 --- a/src/kernels/miopen_type_traits.hpp +++ b/src/kernels/miopen_type_traits.hpp @@ -26,11 +26,6 @@ #pragma once #ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE -/// We need for std::remove_reference and std::remove_cv. -/// But also defines std::true_type, per Standard. -/// However the latter definition conflicts with -/// /opt/rocm/include/hip/amd_detail/amd_hip_vector_types.h, -/// which defines std::true_type as well (which is wrong). namespace std { From 0bcbe483f68589fc17027aed3df5c4927cbfc548 Mon Sep 17 00:00:00 2001 From: atamazov Date: Sun, 24 Dec 2023 01:52:34 +0300 Subject: [PATCH 48/50] fix-hiprtc-60(23) [NFC][HIPRTC][quality] Dismiss WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE, use MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS instead --- src/comgr.cpp | 1 - src/kernels/miopen_cstdint.hpp | 2 +- src/kernels/miopen_type_traits.hpp | 2 +- src/kernels/miopen_utility.hpp | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/comgr.cpp b/src/comgr.cpp index 5d4c71ec57..b5ed19fcbf 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -1292,7 +1292,6 @@ void BuildHip(const std::string& name, auto opts = miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit()); compiler::lc::RemoveOptionsUnwanted(opts); - opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE=1"); // Workaround for SWDEV-308073 #if HIP_PACKAGE_VERSION_MAJOR < 6 opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? #endif diff --git a/src/kernels/miopen_cstdint.hpp b/src/kernels/miopen_cstdint.hpp index ef1c6c5329..57d0d088b1 100644 --- a/src/kernels/miopen_cstdint.hpp +++ b/src/kernels/miopen_cstdint.hpp @@ -25,7 +25,7 @@ *******************************************************************************/ #pragma once -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE +#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int16_t; diff --git a/src/kernels/miopen_type_traits.hpp b/src/kernels/miopen_type_traits.hpp index 1b2f8d4154..352010e3ac 100644 --- a/src/kernels/miopen_type_traits.hpp +++ b/src/kernels/miopen_type_traits.hpp @@ -25,7 +25,7 @@ *******************************************************************************/ #pragma once -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE +#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS namespace std { diff --git a/src/kernels/miopen_utility.hpp b/src/kernels/miopen_utility.hpp index db74017a9a..584ffad278 100644 --- a/src/kernels/miopen_utility.hpp +++ b/src/kernels/miopen_utility.hpp @@ -25,7 +25,7 @@ *******************************************************************************/ #pragma once -#ifdef WORKAROUND_ISSUE_HIPRTC_TRUE_TYPE +#ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS #include "miopen_type_traits.hpp" // std::remove_reference From b0df12412525b7462c7b74ccb68858131bdaa41b Mon Sep 17 00:00:00 2001 From: atamazov Date: Sun, 24 Dec 2023 19:50:29 +0300 Subject: [PATCH 49/50] fix-hiprtc-60(24) [NFC][HIPRTC][quality] Add and use WORKAROUND_DONT_USE_CUSTOM_LIMITS --- src/comgr.cpp | 6 +++++- src/kernels/miopen_limits.hpp | 10 +++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/comgr.cpp b/src/comgr.cpp index b5ed19fcbf..971a60391e 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -1301,7 +1301,11 @@ void BuildHip(const std::string& name, opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1"); #endif opts.push_back("-DHIP_PACKAGE_VERSION_FLAT=" + std::to_string(HIP_PACKAGE_VERSION_FLAT)); - opts.push_back("-DMIOPEN_DONT_USE_HIP_RUNTIME_HEADERS=1"); + opts.push_back("-DMIOPEN_DONT_USE_HIP_RUNTIME_HEADERS"); + /// For now, use only standard to avoid possibility of + /// correctnes or performance regressions. + /// \todo Test and enable "custom" local implementation. + opts.push_back("-DWORKAROUND_DONT_USE_CUSTOM_LIMITS=1"); #if WORKAROUND_ISSUE_1431 if((StartsWith(target.Name(), "gfx10") || StartsWith(target.Name(), "gfx11")) && !miopen::comgr::IsWave64Enforced(opts)) diff --git a/src/kernels/miopen_limits.hpp b/src/kernels/miopen_limits.hpp index abfa653120..2a8f5e6178 100644 --- a/src/kernels/miopen_limits.hpp +++ b/src/kernels/miopen_limits.hpp @@ -25,11 +25,11 @@ *******************************************************************************/ #pragma once -/// For now, use only standard to avoid possibility of correctnes -/// or performance regressions. -/// \todo Test and enable local implementation. -/// #ifdef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS -#if 0 +#ifndef WORKAROUND_DO_NOT_USE_CUSTOM_LIMITS +#define WORKAROUND_DO_NOT_USE_CUSTOM_LIMITS 0 +#endif + +#if defined(MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS) && !WORKAROUND_DONT_USE_CUSTOM_LIMITS #include From 07e095e8b1215a5a95213d55937d8714690a7e79 Mon Sep 17 00:00:00 2001 From: atamazov Date: Sun, 24 Dec 2023 21:43:39 +0300 Subject: [PATCH 50/50] fix-hiprtc-60(26) Code clarity. leftover. --- .../composable_kernel/include/utility/data_type.hpp | 4 +--- test/handle_test.cpp | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp index 470c08fc00..01ae13a405 100644 --- a/src/composable_kernel/composable_kernel/include/utility/data_type.hpp +++ b/src/composable_kernel/composable_kernel/include/utility/data_type.hpp @@ -4,9 +4,7 @@ #include "statically_indexed_array.hpp" #include "miopen_cstdint.hpp" - -/// \todo miopen_limits.hpp -#include +#include "miopen_limits.hpp" namespace ck { diff --git a/test/handle_test.cpp b/test/handle_test.cpp index be337d8c0b..ba143d7ad1 100644 --- a/test/handle_test.cpp +++ b/test/handle_test.cpp @@ -32,8 +32,8 @@ // "HIP_PACKAGE_VERSION_FLAT == 6001000000ULL" is for ROCm 6.1 RC where issue #2600 is not // yet fixed in the compiler. In order to test such release candidates, we have to // override HIP version to 6.1.0. -#define WORKAROUND_ISSUE_2600 \ - ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT < 6001000000ULL) || \ +#define WORKAROUND_ISSUE_2600 \ + ((HIP_PACKAGE_VERSION_FLAT >= 6000000000ULL && HIP_PACKAGE_VERSION_FLAT <= 6000999999ULL) || \ HIP_PACKAGE_VERSION_FLAT == 6001000000ULL) #include