Skip to content

Commit

Permalink
Bump MIOpen version to 3.1.0 and update CI docker (ROCm#2519)
Browse files Browse the repository at this point in the history
  • Loading branch information
junliume authored Dec 20, 2023
1 parent 7a7d288 commit 7da72bc
Show file tree
Hide file tree
Showing 18 changed files with 79 additions and 57 deletions.
15 changes: 14 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ if(NOT WIN32 AND NOT APPLE)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
endif()

rocm_setup_version(VERSION 3.00.0)
rocm_setup_version(VERSION 3.1.0)

list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake )
include(TargetFlags)
Expand Down Expand Up @@ -625,6 +625,19 @@ enable_cppcheck(
knownConditionTrueFalse
shadowFunction
moduloofone
###################################################################
# TODO Code Quality WORKAROUND ROCm 6.0 &&
# Ubuntu 22.04 && cppcheck 2.12.1 update
###################################################################
duplInheritedMember
constParameterCallback
constParameterReference
constParameterPointer
constVariableReference
constVariablePointer
useStlAlgorithm
uselessOverride
unusedScopedObject
FORCE
SOURCES
addkernels/
Expand Down
24 changes: 15 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:20.04 as miopen
FROM ubuntu:22.04 as miopen
ARG DEBIAN_FRONTEND=noninteractive

# Support multiarch
Expand All @@ -18,17 +18,17 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg

RUN wget https://repo.radeon.com/amdgpu-install/5.7.1/ubuntu/focal/amdgpu-install_5.7.50701-1_all.deb --no-check-certificate
RUN wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
./amdgpu-install_5.7.50701-1_all.deb
./amdgpu-install_6.0.60000-1_all.deb

# Add rocm repository
RUN export ROCM_APT_VER=5.7.1;\
RUN export ROCM_APT_VER=6.0;\
echo $ROCM_APT_VER &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list'
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list'
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list"

RUN amdgpu-install -y --usecase=rocm --no-dkms

Expand Down Expand Up @@ -96,11 +96,17 @@ RUN tar zxvf /tmp/ccache.tar.gz -C /tmp/ && mkdir /tmp/ccache-${CCACHE_COMMIT}/b
cd /tmp/ccache-${CCACHE_COMMIT}/build && \
cmake -DZSTD_FROM_INTERNET=ON -DHIREDIS_FROM_INTERNET=ON .. && make -j install && rm -rf /tmp/*
RUN ccache -s

# purge existing composable kernel installed with ROCm
# hence cannot use autoremove since it will remove more components
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get purge -y --allow-unauthenticated \
composablekernel-dev
ARG COMPILER_LAUNCHER=""
RUN if [ "$USE_FIN" = "ON" ]; then \
rbuild prepare -s fin -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
else \
rbuild prepare -s develop -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
fi

RUN ccache -s
Expand Down
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ROCmSoftwarePlatform/rocm-recipes@d7b71f8ff71572833c8cf15b74279dd034e66f9d
-f requirements.txt
danmar/cppcheck@2.9
danmar/cppcheck@2.12.1
google/googletest@v1.14.0
2 changes: 1 addition & 1 deletion docs/DebugAndLogging.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Direct Solutions:
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11` - `ConvOclDirectFwd11x11`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN` - `ConvOclDirectFwdGen`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD` - `ConvOclDirectFwd`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd1x1`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2` - `ConvOclBwdWrW2<n>` (where n = `{1,2,4,8,16}`), and `ConvOclBwdWrW2NonTunable`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53` - `ConvOclBwdWrW53`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1` - `ConvOclBwdWrW1x1`
Expand Down
9 changes: 3 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On
boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations "
facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build
ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH=/opt/conda/envs/py_3.9 -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
# ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build
ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off
ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0
ROCmSoftwarePlatform/eigen@3.4.0
ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50
ROCmSoftwarePlatform/composable_kernel@55a89c746eb6cf7973c47fb9b2635e0f73bd2fc2 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON



ROCmSoftwarePlatform/composable_kernel@d0f355a31a341b0a885ff65231781f332a20cc5f -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP")
endif()
if(ENABLE_HIP_WORKAROUNDS)
# Workaround hip not setting its usage requirements correctly
target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_HCC__=1 )
target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_AMD__=1 )
endif()
# This is helpful for the tests
target_link_libraries( MIOpen INTERFACE $<BUILD_INTERFACE:hip::device> )
Expand Down
6 changes: 4 additions & 2 deletions src/comgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1293,8 +1293,10 @@ void BuildHip(const std::string& name,
miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit());
compiler::lc::RemoveOptionsUnwanted(opts);
opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073
opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround?
#if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL
opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
#endif
opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround?
#if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT
if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name()))
opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1");
Expand Down
2 changes: 1 addition & 1 deletion src/composable_kernel/.clang-tidy
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CheckOptions:
- key: bugprone-reserved-identifier.AllowedIdentifiers
value: '__HIP_PLATFORM_HCC__;__HIP_ROCclr__'
value: '__HIP_PLATFORM_AMD__;__HIP_ROCclr__'
2 changes: 1 addition & 1 deletion src/composable_kernel/cmake/ClangTidy.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ function(clang_tidy_check TARGET)
add_custom_target(${tidy_target}
# for some targets clang-tidy not able to get information from .clang-tidy
DEPENDS ${SOURCE}
COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_AMD__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..."
)
Expand Down
10 changes: 5 additions & 5 deletions src/composable_kernel/external/rocm/include/bfloat16_dev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
extern "C" {
#endif

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define EXECUTION_SPECIFIER __device__
#else
#define EXECUTION_SPECIFIER
Expand All @@ -43,7 +43,7 @@ typedef union

// Composable kernels are written in HIP language. The language doesnt support
// ushort2.hi or ushort2.low.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
ushort ushortvec[2];
#endif // MIOPEN_BACKEND_HIP
float f32;
Expand All @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
{
cvt_bf16_fp32_t target_val;

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.ushortx2 = make_ushort2(0, src_val);
#else
target_val.ushortx2 = (ushort2)(0, src_val);
Expand Down Expand Up @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
// When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
// incrementing it causes it to become an exponent of 0xFF and a mantissa
// of 0x00, which is Inf, the next higher value to the unrounded value.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
#else
target_val.u32 +=
Expand All @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
#endif // MIOPEN_USE_RNE_BFLOAT16
}

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
return target_val.ushortvec[1];
#else
return target_val.ushortx2.hi;
Expand Down
2 changes: 1 addition & 1 deletion src/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor&
}
}

std::size_t out_c;
std::size_t out_c = 0;
std::vector<std::size_t> out_lens(spatial_dim + 2);

auto out_spatial = boost::adaptors::slice(out_lens, 2, 2 + spatial_dim);
Expand Down
10 changes: 5 additions & 5 deletions src/kernels/bfloat16_dev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
extern "C" {
#endif

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define EXECUTION_SPECIFIER __device__
#else
#define EXECUTION_SPECIFIER
Expand All @@ -43,7 +43,7 @@ typedef union cvt_bf16_fp32

// Composable kernels are written in HIP language. The language doesnt support
// ushort2.hi or ushort2.low.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
ushort ushortvec[2];
#endif // MIOPEN_BACKEND_HIP
float f32;
Expand All @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
{
cvt_bf16_fp32_t target_val;

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.ushortx2 = make_ushort2(0, src_val);
#else
target_val.ushortx2 = (ushort2)(0, src_val);
Expand Down Expand Up @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
// When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
// incrementing it causes it to become an exponent of 0xFF and a mantissa
// of 0x00, which is Inf, the next higher value to the unrounded value.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
#else
target_val.u32 +=
Expand All @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
#endif // MIOPEN_USE_RNE_BFLOAT16
}

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
return target_val.ushortvec[1];
#else
return target_val.ushortx2.hi;
Expand Down
36 changes: 18 additions & 18 deletions src/kernels/float_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#define FOUR 4
#define EIGHT 8
#if MIOPEN_USE_FP8 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT hip_f8<miopen_f8::hip_f8_type::fp8>
#define FLOAT_ACCUM float
// HIP implements the correct operators for conversion
Expand All @@ -58,7 +58,7 @@
#endif // MIOPEN_USE_FP8

#if MIOPEN_USE_BFP8 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT hip_f8<miopen_f8::hip_f8_type::bf8>
#define FLOAT_ACCUM float
#else
Expand All @@ -79,7 +79,7 @@
// #endif
#endif // MIOPEN_USE_BFP8

#ifndef __HIP_PLATFORM_HCC__
#ifndef __HIP_PLATFORM_AMD__
#define _FLOAT2 PPCAT(_FLOAT, TWO)
#define _FLOAT4 PPCAT(_FLOAT, FOUR)
#define _FLOAT8 PPCAT(_FLOAT, EIGHT)
Expand All @@ -99,19 +99,19 @@
#endif

#if MIOPEN_USE_DOUBLE_ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT_ACCUM double
#else
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define _FLOAT_ACCUM double
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#define MAX_VAL_ACCUM DBL_MAX
#else // MIOPEN_USE_DOUBLE_ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT_ACCUM float
#else
#define _FLOAT_ACCUM float
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#ifndef FLT_MAX
#define MAX_VAL_ACCUM 3.402823466e+38F
#else
Expand All @@ -120,12 +120,12 @@
#endif // MIOPEN_USE_DOUBLE_ACCUM

#if MIOPEN_USE_FP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT _Float16
#else // __HIP_PLATFORM_HCC__
#else // __HIP_PLATFORM_AMD__
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define _FLOAT half
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#define SIZEOF_FLOAT 2
// Max value for the main datatype
#ifndef HALF_MAX
Expand All @@ -136,11 +136,11 @@
#endif // MIOPEN_USE_FP16

#if MIOPEN_USE_FP32 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT float
#else
#define _FLOAT float
#endif // __HIP_PLATFORM_HCC__
#endif // __HIP_PLATFORM_AMD__
#define SIZEOF_FLOAT 4
// Max value for the main datatype
#ifndef FLT_MAX
Expand All @@ -151,7 +151,7 @@
#endif // MIOPEN_USE_FP32

#if MIOPEN_USE_BFP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define FLOAT ushort
#else
#define _FLOAT ushort
Expand All @@ -162,7 +162,7 @@
#endif // MIOPEN_USE_BFP16

#if MIOPEN_USE_FP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_FLOAT2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
#define CVT_ACCUM2FLOAT(x) (static_cast<FLOAT>(x))
#define CVT_INTEGRAL2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
Expand All @@ -188,7 +188,7 @@
/// refactoring should be considered as nontrivial and requires
/// a separate PR. Let's keep this historical stuff for now.
/// --atamazov 30.08.2023
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_FLOAT2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
#define CVT_ACCUM2FLOAT(x) (static_cast<FLOAT>(x))
#define CVT_INTEGRAL2ACCUM(x) (static_cast<FLOAT_ACCUM>(x))
Expand All @@ -202,7 +202,7 @@
#endif // MIOPEN_USE_FP32

#if MIOPEN_USE_BFP16 == 1
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_FLOAT2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
#define CVT_ACCUM2FLOAT(x) MIOPEN_ERROR_NOT_IMLEMENTED
#define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
Expand Down Expand Up @@ -232,7 +232,7 @@
#endif

#if MIOPEN_USE_NATIVE_DATATYPE_ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#undef FLOAT_ACCUM
#define FLOAT_ACCUM MIOPEN_ERROR_NOT_IMLEMENTED
#else
Expand All @@ -250,7 +250,7 @@
#define CVT_FP32_2ACCUM(x) (CVT_FP32_2FLOAT(x))

#undef CVT_INTEGRAL2ACCUM
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define CVT_INTEGRAL2ACCUM(x) MIOPEN_ERROR_NOT_IMLEMENTED
#else
#if MIOPEN_USE_BFP16 == 1
Expand Down
2 changes: 1 addition & 1 deletion src/kernels/hip_f8_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
// #include <half.hpp>
namespace miopen_hip_f8_impl {

#ifndef __HIP_PLATFORM_HCC__
#ifndef __HIP_PLATFORM_AMD__
using hip_bfloat16 = bfloat16;
using half = half_float::half;
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/kernels/hip_float8.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#endif

// FP8 header version 0.4, 2021/05/11
#if defined __HIP_PLATFORM_HCC__ && MIOPEN_ENABLE_F8_DEVICE_CODE
#if defined __HIP_PLATFORM_AMD__ && MIOPEN_ENABLE_F8_DEVICE_CODE
// MIOpen by default does not have device code in the regular compilation paths,
// therefore, when this file is used from the host side, compilation takes much
// longer. By guarding the __device__ directive we can control that such compilation
Expand Down
Loading

0 comments on commit 7da72bc

Please sign in to comment.