diff --git a/CMakeLists.txt b/CMakeLists.txt index 3a15bf13e4..12c5d3f8e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,7 @@ if(NOT WIN32 AND NOT APPLE) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") endif() -rocm_setup_version(VERSION 3.00.0) +rocm_setup_version(VERSION 3.1.0) list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ) include(TargetFlags) @@ -625,6 +625,19 @@ enable_cppcheck( knownConditionTrueFalse shadowFunction moduloofone + ################################################################### + # TODO Code Quality WORKAROUND ROCm 6.0 && + # Ubuntu 22.04 && cppcheck 2.12.1 update + ################################################################### + duplInheritedMember + constParameterCallback + constParameterReference + constParameterPointer + constVariableReference + constVariablePointer + useStlAlgorithm + uselessOverride + unusedScopedObject FORCE SOURCES addkernels/ diff --git a/Dockerfile b/Dockerfile index 8ebaa17969..e2e9af51c3 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 as miopen +FROM ubuntu:22.04 as miopen ARG DEBIAN_FRONTEND=noninteractive # Support multiarch @@ -18,17 +18,17 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg -RUN wget https://repo.radeon.com/amdgpu-install/5.7.1/ubuntu/focal/amdgpu-install_5.7.50701-1_all.deb --no-check-certificate +RUN wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ - ./amdgpu-install_5.7.50701-1_all.deb + ./amdgpu-install_6.0.60000-1_all.deb # Add rocm repository -RUN export ROCM_APT_VER=5.7.1;\ +RUN export ROCM_APT_VER=6.0;\ echo $ROCM_APT_VER &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\ -sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list' -RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list" +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\ +sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list' +RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list" RUN amdgpu-install -y --usecase=rocm --no-dkms @@ -96,11 +96,17 @@ RUN tar zxvf /tmp/ccache.tar.gz -C /tmp/ && mkdir /tmp/ccache-${CCACHE_COMMIT}/b cd /tmp/ccache-${CCACHE_COMMIT}/build && \ cmake -DZSTD_FROM_INTERNET=ON -DHIREDIS_FROM_INTERNET=ON .. && make -j install && rm -rf /tmp/* RUN ccache -s + +# purge existing composable kernel installed with ROCm +# hence cannot use autoremove since it will remove more components +RUN apt-get update && \ +DEBIAN_FRONTEND=noninteractive apt-get purge -y --allow-unauthenticated \ + composablekernel-dev ARG COMPILER_LAUNCHER="" RUN if [ "$USE_FIN" = "ON" ]; then \ - rbuild prepare -s fin -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ + rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ else \ - rbuild prepare -s develop -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ + rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \ fi RUN ccache -s diff --git a/dev-requirements.txt b/dev-requirements.txt index 37edd602bc..049ed57f3d 100755 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,4 @@ ROCmSoftwarePlatform/rocm-recipes@d7b71f8ff71572833c8cf15b74279dd034e66f9d -f requirements.txt -danmar/cppcheck@2.9 +danmar/cppcheck@2.12.1 google/googletest@v1.14.0 diff --git a/docs/DebugAndLogging.md b/docs/DebugAndLogging.md index 8996580208..f862274ac5 100644 --- a/docs/DebugAndLogging.md +++ b/docs/DebugAndLogging.md @@ -94,7 +94,7 @@ Direct Solutions: * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11` - `ConvOclDirectFwd11x11`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN` - `ConvOclDirectFwdGen`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD` - `ConvOclDirectFwd`. -* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd`. +* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd1x1`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2` - `ConvOclBwdWrW2` (where n = `{1,2,4,8,16}`), and `ConvOclBwdWrW2NonTunable`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53` - `ConvOclBwdWrW53`. * `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1` - `ConvOclBwdWrW1x1` diff --git a/requirements.txt b/requirements.txt index ba97e47d43..2564b5a8e2 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,10 @@ sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations " facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake -ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build -ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH=/opt/conda/envs/py_3.9 -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda +# ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build +ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0 ROCmSoftwarePlatform/eigen@3.4.0 ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50 -ROCmSoftwarePlatform/composable_kernel@55a89c746eb6cf7973c47fb9b2635e0f73bd2fc2 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON - - - +ROCmSoftwarePlatform/composable_kernel@d0f355a31a341b0a885ff65231781f332a20cc5f -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index efefc77520..7f40650c1c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -770,7 +770,7 @@ elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP") endif() if(ENABLE_HIP_WORKAROUNDS) # Workaround hip not setting its usage requirements correctly - target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_HCC__=1 ) + target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_AMD__=1 ) endif() # This is helpful for the tests target_link_libraries( MIOpen INTERFACE $ ) diff --git a/src/comgr.cpp b/src/comgr.cpp index 4040881e09..08c61efbc7 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -1293,8 +1293,10 @@ void BuildHip(const std::string& name, miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit()); compiler::lc::RemoveOptionsUnwanted(opts); opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073 - opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? - opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? +#if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL + opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround? +#endif + opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround? #if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name())) opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1"); diff --git a/src/composable_kernel/.clang-tidy b/src/composable_kernel/.clang-tidy index 5c2b781687..8d0880abcf 100644 --- a/src/composable_kernel/.clang-tidy +++ b/src/composable_kernel/.clang-tidy @@ -1,3 +1,3 @@ CheckOptions: - key: bugprone-reserved-identifier.AllowedIdentifiers - value: '__HIP_PLATFORM_HCC__;__HIP_ROCclr__' + value: '__HIP_PLATFORM_AMD__;__HIP_ROCclr__' diff --git a/src/composable_kernel/cmake/ClangTidy.cmake b/src/composable_kernel/cmake/ClangTidy.cmake index 8de726de09..04ec12c326 100644 --- a/src/composable_kernel/cmake/ClangTidy.cmake +++ b/src/composable_kernel/cmake/ClangTidy.cmake @@ -149,7 +149,7 @@ function(clang_tidy_check TARGET) add_custom_target(${tidy_target} # for some targets clang-tidy not able to get information from .clang-tidy DEPENDS ${SOURCE} - COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" + COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_AMD__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..." ) diff --git a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp index f5fa35adfb..26d8645d61 100644 --- a/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp +++ b/src/composable_kernel/external/rocm/include/bfloat16_dev.hpp @@ -30,7 +30,7 @@ extern "C" { #endif -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define EXECUTION_SPECIFIER __device__ #else #define EXECUTION_SPECIFIER @@ -43,7 +43,7 @@ typedef union // Composable kernels are written in HIP language. The language doesnt support // ushort2.hi or ushort2.low. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ ushort ushortvec[2]; #endif // MIOPEN_BACKEND_HIP float f32; @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val) { cvt_bf16_fp32_t target_val; -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.ushortx2 = make_ushort2(0, src_val); #else target_val.ushortx2 = (ushort2)(0, src_val); @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, // incrementing it causes it to become an exponent of 0xFF and a mantissa // of 0x00, which is Inf, the next higher value to the unrounded value. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1)); #else target_val.u32 += @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) #endif // MIOPEN_USE_RNE_BFLOAT16 } -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ return target_val.ushortvec[1]; #else return target_val.ushortx2.hi; diff --git a/src/convolution.cpp b/src/convolution.cpp index 5653477fe3..25bada0788 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -289,7 +289,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& } } - std::size_t out_c; + std::size_t out_c = 0; std::vector out_lens(spatial_dim + 2); auto out_spatial = boost::adaptors::slice(out_lens, 2, 2 + spatial_dim); diff --git a/src/kernels/bfloat16_dev.hpp b/src/kernels/bfloat16_dev.hpp index f5f24baa81..4b85a95975 100644 --- a/src/kernels/bfloat16_dev.hpp +++ b/src/kernels/bfloat16_dev.hpp @@ -30,7 +30,7 @@ extern "C" { #endif -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define EXECUTION_SPECIFIER __device__ #else #define EXECUTION_SPECIFIER @@ -43,7 +43,7 @@ typedef union cvt_bf16_fp32 // Composable kernels are written in HIP language. The language doesnt support // ushort2.hi or ushort2.low. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ ushort ushortvec[2]; #endif // MIOPEN_BACKEND_HIP float f32; @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val) { cvt_bf16_fp32_t target_val; -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.ushortx2 = make_ushort2(0, src_val); #else target_val.ushortx2 = (ushort2)(0, src_val); @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, // incrementing it causes it to become an exponent of 0xFF and a mantissa // of 0x00, which is Inf, the next higher value to the unrounded value. -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1)); #else target_val.u32 += @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val) #endif // MIOPEN_USE_RNE_BFLOAT16 } -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ return target_val.ushortvec[1]; #else return target_val.ushortx2.hi; diff --git a/src/kernels/float_types.h b/src/kernels/float_types.h index 5406ba85ec..beded11d8d 100644 --- a/src/kernels/float_types.h +++ b/src/kernels/float_types.h @@ -34,7 +34,7 @@ #define FOUR 4 #define EIGHT 8 #if MIOPEN_USE_FP8 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT hip_f8 #define FLOAT_ACCUM float // HIP implements the correct operators for conversion @@ -58,7 +58,7 @@ #endif // MIOPEN_USE_FP8 #if MIOPEN_USE_BFP8 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT hip_f8 #define FLOAT_ACCUM float #else @@ -79,7 +79,7 @@ // #endif #endif // MIOPEN_USE_BFP8 -#ifndef __HIP_PLATFORM_HCC__ +#ifndef __HIP_PLATFORM_AMD__ #define _FLOAT2 PPCAT(_FLOAT, TWO) #define _FLOAT4 PPCAT(_FLOAT, FOUR) #define _FLOAT8 PPCAT(_FLOAT, EIGHT) @@ -99,19 +99,19 @@ #endif #if MIOPEN_USE_DOUBLE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT_ACCUM double #else #pragma OPENCL EXTENSION cl_khr_fp64 : enable #define _FLOAT_ACCUM double -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define MAX_VAL_ACCUM DBL_MAX #else // MIOPEN_USE_DOUBLE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT_ACCUM float #else #define _FLOAT_ACCUM float -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #ifndef FLT_MAX #define MAX_VAL_ACCUM 3.402823466e+38F #else @@ -120,12 +120,12 @@ #endif // MIOPEN_USE_DOUBLE_ACCUM #if MIOPEN_USE_FP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT _Float16 -#else // __HIP_PLATFORM_HCC__ +#else // __HIP_PLATFORM_AMD__ #pragma OPENCL EXTENSION cl_khr_fp16 : enable #define _FLOAT half -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define SIZEOF_FLOAT 2 // Max value for the main datatype #ifndef HALF_MAX @@ -136,11 +136,11 @@ #endif // MIOPEN_USE_FP16 #if MIOPEN_USE_FP32 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT float #else #define _FLOAT float -#endif // __HIP_PLATFORM_HCC__ +#endif // __HIP_PLATFORM_AMD__ #define SIZEOF_FLOAT 4 // Max value for the main datatype #ifndef FLT_MAX @@ -151,7 +151,7 @@ #endif // MIOPEN_USE_FP32 #if MIOPEN_USE_BFP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define FLOAT ushort #else #define _FLOAT ushort @@ -162,7 +162,7 @@ #endif // MIOPEN_USE_BFP16 #if MIOPEN_USE_FP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) (static_cast(x)) #define CVT_ACCUM2FLOAT(x) (static_cast(x)) #define CVT_INTEGRAL2ACCUM(x) (static_cast(x)) @@ -188,7 +188,7 @@ /// refactoring should be considered as nontrivial and requires /// a separate PR. Let's keep this historical stuff for now. /// --atamazov 30.08.2023 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) (static_cast(x)) #define CVT_ACCUM2FLOAT(x) (static_cast(x)) #define CVT_INTEGRAL2ACCUM(x) (static_cast(x)) @@ -202,7 +202,7 @@ #endif // MIOPEN_USE_FP32 #if MIOPEN_USE_BFP16 == 1 -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_FLOAT2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED #define CVT_ACCUM2FLOAT(x) MIOPEN_ERROR_NOT_IMLEMENTED #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED @@ -232,7 +232,7 @@ #endif #if MIOPEN_USE_NATIVE_DATATYPE_ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #undef FLOAT_ACCUM #define FLOAT_ACCUM MIOPEN_ERROR_NOT_IMLEMENTED #else @@ -250,7 +250,7 @@ #define CVT_FP32_2ACCUM(x) (CVT_FP32_2FLOAT(x)) #undef CVT_INTEGRAL2ACCUM -#ifdef __HIP_PLATFORM_HCC__ +#ifdef __HIP_PLATFORM_AMD__ #define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED #else #if MIOPEN_USE_BFP16 == 1 diff --git a/src/kernels/hip_f8_impl.hpp b/src/kernels/hip_f8_impl.hpp index c8d49cd474..8ff8255ceb 100644 --- a/src/kernels/hip_f8_impl.hpp +++ b/src/kernels/hip_f8_impl.hpp @@ -27,7 +27,7 @@ // #include namespace miopen_hip_f8_impl { -#ifndef __HIP_PLATFORM_HCC__ +#ifndef __HIP_PLATFORM_AMD__ using hip_bfloat16 = bfloat16; using half = half_float::half; #endif diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp index 2947d6d713..d7ec875d17 100644 --- a/src/kernels/hip_float8.hpp +++ b/src/kernels/hip_float8.hpp @@ -29,7 +29,7 @@ #endif // FP8 header version 0.4, 2021/05/11 -#if defined __HIP_PLATFORM_HCC__ && MIOPEN_ENABLE_F8_DEVICE_CODE +#if defined __HIP_PLATFORM_AMD__ && MIOPEN_ENABLE_F8_DEVICE_CODE // MIOpen by default does not have device code in the regular compilation paths, // therefore, when this file is used from the host side, compilation takes much // longer. By guarding the __device__ directive we can control that such compilation diff --git a/src/rnn_api.cpp b/src/rnn_api.cpp index 4ad183f2bf..d60670fa4c 100644 --- a/src/rnn_api.cpp +++ b/src/rnn_api.cpp @@ -523,7 +523,7 @@ static void LogCmdRNN(const miopenTensorDescriptor_t* xDesc, const int seqLength, const RNNDir_t dir) { - if(miopen::IsLoggingCmd()) + if(miopen::IsLoggingCmd() && seqLength > 0) { std::string mode; miopenRNNMode_t rnnMode = miopen::deref(rnnDesc).rnnMode; diff --git a/test/handle_test.cpp b/test/handle_test.cpp index 409d2864a8..ade06c5447 100644 --- a/test/handle_test.cpp +++ b/test/handle_test.cpp @@ -28,6 +28,10 @@ /// \todo Create dedicated ticket and rename macro. #define WORKAROUND_SWDEV_257056_PCH_MISSING_MACROS 1 +// https://gerrit-git.amd.com/c/compute/ec/clr/+/972441 +#define WORKAROUND_ISSUE_2600 \ + (HIP_PACKAGE_VERSION_FLAT > 5007023384ULL && HIP_PACKAGE_VERSION_FLAT <= 6000023494ULL) + #include #include #include @@ -251,7 +255,7 @@ std::string WriteNop(kernel_type_t kern_type) void test_warnings(kernel_type_t kern_type) { auto&& h = get_handle(); -#if MIOPEN_BUILD_DEV +#if MIOPEN_BUILD_DEV && !WORKAROUND_ISSUE_2600 if(kern_type == miopenOpenCLKernelType) { EXPECT(throws([&] { diff --git a/test/na_train.cpp b/test/na_train.cpp index c6d585964c..e776f4414e 100644 --- a/test/na_train.cpp +++ b/test/na_train.cpp @@ -804,7 +804,7 @@ struct na_fusion_driver : test_driver std::size_t input_n, input_c, input_h, input_w; std::tie(input_n, input_c, input_h, input_w) = miopen::tien<4>(input.desc.GetLengths()); - this->tolerance = 80 * float(input.desc.GetElementSize()); + this->tolerance = 80 * double(input.desc.GetElementSize()); ptr_activdesc = GetManagedActivDesc(); miopenSetActivationDescriptor(ptr_activdesc.get(), activ_mode, alpha, beta, gamma); auto&& handle = get_handle();