From 2c9fe96891d0d7819e62f62b4eb40e3d50cefac7 Mon Sep 17 00:00:00 2001 From: atamazov Date: Wed, 27 Dec 2023 03:24:21 +0300 Subject: [PATCH] Bump MIOpen version to 3.0.1 and update CI docker (partial cherry-pick of #2519) Differences from #2519: - rocm_setup_version(VERSION 3.1.0) -> rocm_setup_version(VERSION 3.0.1) - Changes of requirements.txt REVERTED (cherry picked from commit 7da72bc7f8c77638317339582a85f2972031cc88 and EDITED) # RESOLVED Conflicts: # Dockerfile # dev-requirements.txt # requirements.txt -- REVERTED --- CMakeLists.txt | 15 +++++++- Dockerfile | 24 ++++++++----- dev-requirements.txt | 2 +- docs/DebugAndLogging.md | 2 +- src/CMakeLists.txt | 2 +- src/comgr.cpp | 6 ++-- src/composable_kernel/.clang-tidy | 2 +- src/composable_kernel/cmake/ClangTidy.cmake | 2 +- .../external/rocm/include/bfloat16_dev.hpp | 10 +++--- src/convolution.cpp | 2 +- src/kernels/bfloat16_dev.hpp | 10 +++--- src/kernels/float_types.h | 36 +++++++++---------- src/kernels/hip_f8_impl.hpp | 2 +- src/kernels/hip_float8.hpp | 2 +- src/rnn_api.cpp | 2 +- test/handle_test.cpp | 6 +++- test/na_train.cpp | 2 +- 17 files changed, 76 insertions(+), 51 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 46cf416782..8c4f0fb2e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,7 +99,7 @@ if(NOT WIN32 AND NOT APPLE) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") endif() -rocm_setup_version(VERSION 3.00.0) +rocm_setup_version(VERSION 3.0.1) list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ) include(TargetFlags) @@ -761,6 +761,19 @@ enable_cppcheck( knownConditionTrueFalse shadowFunction moduloofone + ################################################################### + # TODO Code Quality WORKAROUND ROCm 6.0 && + # Ubuntu 22.04 && cppcheck 2.12.1 update + ################################################################### + duplInheritedMember + constParameterCallback + constParameterReference + constParameterPointer + constVariableReference + constVariablePointer + useStlAlgorithm + uselessOverride + unusedScopedObject FORCE SOURCES addkernels/ diff --git a/Dockerfile b/Dockerfile index d958879d3c..36e4dc484e 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 as miopen +FROM ubuntu:22.04 as miopen ARG DEBIAN_FRONTEND=noninteractive # Support multiarch @@ -18,17 +18,17 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg -RUN wget https://repo.radeon.com/amdgpu-install/5.7/ubuntu/focal/amdgpu-install_5.7.50700-1_all.deb --no-check-certificate +RUN wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ - ./amdgpu-install_5.7.50700-1_all.deb + ./amdgpu-install_6.0.60000-1_all.deb # Add rocm repository -RUN export ROCM_APT_VER=5.7;\ +RUN export ROCM_APT_VER=6.0;\ echo $ROCM_APT_VER &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list' -RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list" +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\ +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list' +RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list" RUN amdgpu-install -y --usecase=rocm --no-dkms @@ -94,11 +94,17 @@ RUN rm -rf /tmp/ccache* && mkdir /tmp/ccache && wget https://github.com/ccache/c cd /tmp/ccache-${CCACHE_COMMIT}/build && \ cmake -DZSTD_FROM_INTERNET=ON -DHIREDIS_FROM_INTERNET=ON .. && make -j install && rm -rf /tmp/* RUN ccache -s + +# purge existing composable kernel installed with ROCm +# hence cannot use autoremove since it will remove more components +RUN apt-get update && \ +DEBIAN_FRONTEND=noninteractive apt-get purge -y --allow-unauthenticated \ + composablekernel-dev ARG COMPILER_LAUNCHER="" RUN if [ "$USE_FIN" = "ON" ]; then \ - rbuild prepare -s fin -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ + rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ else \ - rbuild prepare -s develop -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ + rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ fi RUN ccache -s diff --git a/dev-requirements.txt b/dev-requirements.txt index 86ee06ae33..ddc6212455 100755 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,3 +1,3 @@ ROCmSoftwarePlatform/rocm-recipes -f requirements.txt -danmar/cppcheck@2.9 +danmar/cppcheck@2.12.1 diff --git a/docs/DebugAndLogging.md b/docs/DebugAndLogging.md index 3ae5db123a..b1e497efcc 100644 --- a/docs/DebugAndLogging.md +++ b/docs/DebugAndLogging.md @@ -94,7 +94,7 @@ Direct Solutions: * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11` - `ConvOclDirectFwd11x11`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN` - `ConvOclDirectFwdGen`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD` - `ConvOclDirectFwd`. -* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd`. +* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd1x1`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2` - `ConvOclBwdWrW2` (where n = `{1,2,4,8,16}`), and `ConvOclBwdWrW2NonTunable`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53` - `ConvOclBwdWrW53`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1` - `ConvOclBwdWrW1x1` diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2787733356..218d715a50 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -778,7 +778,7 @@ elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP") endif() if(ENABLE_HIP_WORKAROUNDS) # Workaround hip not setting its usage requirements correctly - target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_HCC__=1 ) + target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_AMD__=1 ) endif() # This is helpful for the tests target_link_libraries( MIOpen INTERFACE $ ) diff --git a/src/comgr.cpp b/src/comgr.cpp index 18f41c862d..44d38d6f3d 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -1293,8 +1293,10 @@ void BuildHip(const std::string& name, miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit()); compiler::lc::RemoveOptionsUnwanted(opts); opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073 - opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? - opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? +#if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL + opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? +#endif + opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name())) opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1"); diff --git a/src/composable_kernel/.clang-tidy b/src/composable_kernel/.clang-tidy index 5c2b781687..8d0880abcf 100644 --- a/src/composable_kernel/.clang-tidy +++ b/src/composable_kernel/.clang-tidy @@ -1,3 +1,3 @@ CheckOptions: - key: bugprone-reserved-identifier.AllowedIdentifiers - value: '__HIP_PLATFORM_HCC__;__HIP_ROCclr__' + value: '__HIP_PLATFORM_AMD__;__HIP_ROCclr__' diff --git a/src/composable_kernel/cmake/ClangTidy.cmake b/src/composable_kernel/cmake/ClangTidy.cmake index 01b348c458..7c17f91f1f 100644 --- a/src/composable_kernel/cmake/ClangTidy.cmake +++ b/src/composable_kernel/cmake/ClangTidy.cmake @@ -149,7 +149,7 @@ function(clang_tidy_check TARGET) add_custom_target(${tidy_target} # for some targets clang-tidy not able to get information from .clang-tidy DEPENDS ${SOURCE} - COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" + COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_AMD__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..." ) diff --git a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp index 52d00346cf..02e8da33a9 100644 --- a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp +++ b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp @@ -30,7 +30,7 @@ extern "C" { #endif -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define EXECUTION_SPECIFIER __device__ #else #define EXECUTION_SPECIFIER @@ -43,7 +43,7 @@ typedef union // Composable kernels are written in HIP language. The language doesnt support // ushort2.hi or ushort2.low. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ ushort ushortvec[2]; #endif // MIOPEN_BACKEND_HIP float f32; @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val) { cvt_bf16_fp32_t target_val; -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.ushortx2 = make_ushort2(0, src_val); #else target_val.ushortx2 = (ushort2)(0, src_val); @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, // incrementing it causes it to become an exponent of 0xFF and a mantissa // of 0x00, which is Inf, the next higher value to the unrounded value. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1)); #else target_val.u32 += @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) #endif // MIOPEN_USE_RNE_BFLOAT16 } -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ return target_val.ushortvec[1]; #else return target_val.ushortx2.hi; diff --git a/src/convolution.cpp b/src/convolution.cpp index ac7c28fdc4..3c8e36e43a 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -289,7 +289,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& } } - std::size_t out_c; + std::size_t out_c = 0; std::vector out_lens(spatial_dim + 2); auto out_spatial = boost::adaptors::slice(out_lens, 2, 2 + spatial_dim); diff --git a/src/kernels/bfloat16_dev.hpp b/src/kernels/bfloat16_dev.hpp index c1a77c90db..2244385686 100644 --- a/src/kernels/bfloat16_dev.hpp +++ b/src/kernels/bfloat16_dev.hpp @@ -30,7 +30,7 @@ extern "C" { #endif -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define EXECUTION_SPECIFIER __device__ #else #define EXECUTION_SPECIFIER @@ -43,7 +43,7 @@ typedef union cvt_bf16_fp32 // Composable kernels are written in HIP language. The language doesnt support // ushort2.hi or ushort2.low. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ ushort ushortvec[2]; #endif // MIOPEN_BACKEND_HIP float f32; @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val) { cvt_bf16_fp32_t target_val; -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.ushortx2 = make_ushort2(0, src_val); #else target_val.ushortx2 = (ushort2)(0, src_val); @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, // incrementing it causes it to become an exponent of 0xFF and a mantissa // of 0x00, which is Inf, the next higher value to the unrounded value. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1)); #else target_val.u32 += @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) #endif // MIOPEN_USE_RNE_BFLOAT16 } -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ return target_val.ushortvec[1]; #else return target_val.ushortx2.hi; diff --git a/src/kernels/float_types.h b/src/kernels/float_types.h index 5406ba85ec..beded11d8d 100644 --- a/src/kernels/float_types.h +++ b/src/kernels/float_types.h @@ -34,7 +34,7 @@ #define FOUR 4 #define EIGHT 8 #if MIOPEN_USE_FP8 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT hip_f8 #define FLOAT_ACCUM float // HIP implements the correct operators for conversion @@ -58,7 +58,7 @@ #endif // MIOPEN_USE_FP8 #if MIOPEN_USE_BFP8 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT hip_f8 #define FLOAT_ACCUM float #else @@ -79,7 +79,7 @@ // #endif #endif // MIOPEN_USE_BFP8 -#ifndef __HIP_PLATFORM_HCC__ +#ifndef __HIP_PLATFORM_AMD__ #define _FLOAT2 PPCAT(_FLOAT, TWO) #define _FLOAT4 PPCAT(_FLOAT, FOUR) #define _FLOAT8 PPCAT(_FLOAT, EIGHT) @@ -99,19 +99,19 @@ #endif #if MIOPEN_USE_DOUBLE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT_ACCUM double #else #pragma OPENCL EXTENSION cl_khr_fp64 : enable #define _FLOAT_ACCUM double -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define MAX_VAL_ACCUM DBL_MAX #else // MIOPEN_USE_DOUBLE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT_ACCUM float #else #define _FLOAT_ACCUM float -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #ifndef FLT_MAX #define MAX_VAL_ACCUM 3.402823466e+38F #else @@ -120,12 +120,12 @@ #endif // MIOPEN_USE_DOUBLE_ACCUM #if MIOPEN_USE_FP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT _Float16 -#else // __HIP_PLATFORM_HCC__ +#else // __HIP_PLATFORM_AMD__ #pragma OPENCL EXTENSION cl_khr_fp16 : enable #define _FLOAT half -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define SIZEOF_FLOAT 2 // Max value for the main datatype #ifndef HALF_MAX @@ -136,11 +136,11 @@ #endif // MIOPEN_USE_FP16 #if MIOPEN_USE_FP32 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT float #else #define _FLOAT float -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define SIZEOF_FLOAT 4 // Max value for the main datatype #ifndef FLT_MAX @@ -151,7 +151,7 @@ #endif // MIOPEN_USE_FP32 #if MIOPEN_USE_BFP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT ushort #else #define _FLOAT ushort @@ -162,7 +162,7 @@ #endif // MIOPEN_USE_BFP16 #if MIOPEN_USE_FP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) (static_cast(x)) #define CVT_ACCUM2FLOAT(x) (static_cast(x)) #define CVT_INTEGRAL2ACCUM(x) (static_cast(x)) @@ -188,7 +188,7 @@ /// refactoring should be considered as nontrivial and requires /// a separate PR. Let's keep this historical stuff for now. /// --atamazov 30.08.2023 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) (static_cast(x)) #define CVT_ACCUM2FLOAT(x) (static_cast(x)) #define CVT_INTEGRAL2ACCUM(x) (static_cast(x)) @@ -202,7 +202,7 @@ #endif // MIOPEN_USE_FP32 #if MIOPEN_USE_BFP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED #define CVT_ACCUM2FLOAT(x) MIOPEN_ERROR_NOT_IMLEMENTED #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED @@ -232,7 +232,7 @@ #endif #if MIOPEN_USE_NATIVE_DATATYPE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #undef FLOAT_ACCUM #define FLOAT_ACCUM MIOPEN_ERROR_NOT_IMLEMENTED #else @@ -250,7 +250,7 @@ #define CVT_FP32_2ACCUM(x) (CVT_FP32_2FLOAT(x)) #undef CVT_INTEGRAL2ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED #else #if MIOPEN_USE_BFP16 == 1 diff --git a/src/kernels/hip_f8_impl.hpp b/src/kernels/hip_f8_impl.hpp index 45edeffc4b..23877e07ec 100644 --- a/src/kernels/hip_f8_impl.hpp +++ b/src/kernels/hip_f8_impl.hpp @@ -27,7 +27,7 @@ // #include namespace miopen_hip_f8_impl { -#ifndef __HIP_PLATFORM_HCC__ +#ifndef __HIP_PLATFORM_AMD__ using hip_bfloat16 = bfloat16; using half = half_float::half; #endif diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp index 7bfac93ecd..6886c3fc1c 100644 --- a/src/kernels/hip_float8.hpp +++ b/src/kernels/hip_float8.hpp @@ -29,7 +29,7 @@ #endif // FP8 header version 0.4, 2021/05/11 -#if defined __HIP_PLATFORM_HCC__ && MIOPEN_ENABLE_F8_DEVICE_CODE +#if defined __HIP_PLATFORM_AMD__ && MIOPEN_ENABLE_F8_DEVICE_CODE // MIOpen by default does not have device code in the regular compilation paths, // therefore, when this file is used from the host side, compilation takes much // longer. By guarding the __device__ directive we can control that such compilation diff --git a/src/rnn_api.cpp b/src/rnn_api.cpp index 8dce8f364e..e04dc5c2ea 100644 --- a/src/rnn_api.cpp +++ b/src/rnn_api.cpp @@ -529,7 +529,7 @@ static void LogCmdRNN(const miopenTensorDescriptor_t* xDesc, const int seqLength, const RNNDir_t dir) { - if(miopen::IsLoggingCmd()) + if(miopen::IsLoggingCmd() && seqLength > 0) { std::string mode; miopenRNNMode_t rnnMode = miopen::deref(rnnDesc).rnnMode; diff --git a/test/handle_test.cpp b/test/handle_test.cpp index 1ab319d6fb..2fcd07a2cd 100644 --- a/test/handle_test.cpp +++ b/test/handle_test.cpp @@ -28,6 +28,10 @@ /// \todo Create dedicated ticket and rename macro. #define WORKAROUND_SWDEV_257056_PCH_MISSING_MACROS 1 +// https://gerrit-git.amd.com/c/compute/ec/clr/+/972441 +#define WORKAROUND_ISSUE_2600 \ + (HIP_PACKAGE_VERSION_FLAT > 5007023384ULL && HIP_PACKAGE_VERSION_FLAT <= 6000023494ULL) + #include #include #include @@ -207,7 +211,7 @@ std::string WriteNop(kernel_type_t kern_type) void test_warnings(kernel_type_t kern_type) { auto&& h = get_handle(); -#if MIOPEN_BUILD_DEV +#if MIOPEN_BUILD_DEV && !WORKAROUND_ISSUE_2600 if(kern_type == miopenOpenCLKernelType) EXPECT(throws([&] { h.AddKernel("GEMM", "", WriteNop(kern_type), "write", {1, 1, 1}, {1, 1, 1}, ""); diff --git a/test/na_train.cpp b/test/na_train.cpp index 24529058ed..80f3fc781f 100644 --- a/test/na_train.cpp +++ b/test/na_train.cpp @@ -802,7 +802,7 @@ struct na_fusion_driver : test_driver std::size_t input_n, input_c, input_h, input_w; std::tie(input_n, input_c, input_h, input_w) = miopen::tien<4>(input.desc.GetLengths()); - this->tolerance = 80 * float(input.desc.GetElementSize()); + this->tolerance = 80 * double(input.desc.GetElementSize()); ptr_activdesc = GetManagedActivDesc(); miopenSetActivationDescriptor(ptr_activdesc.get(), activ_mode, alpha, beta, gamma); auto&& handle = get_handle();