Skip to content

Commit

Permalink
Merge branch 'develop' into cderb/gtest_package
Browse files Browse the repository at this point in the history
  • Loading branch information
junliume authored Dec 21, 2023
2 parents fed0b7c + 3c9d69a commit b2eeb23
Show file tree
Hide file tree
Showing 30 changed files with 937 additions and 69 deletions.
15 changes: 14 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ if(NOT WIN32 AND NOT APPLE)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
endif()

rocm_setup_version(VERSION 3.00.0)
rocm_setup_version(VERSION 3.1.0)

list( APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake )
include(TargetFlags)
Expand Down Expand Up @@ -625,6 +625,19 @@ enable_cppcheck(
knownConditionTrueFalse
shadowFunction
moduloofone
###################################################################
# TODO Code Quality WORKAROUND ROCm 6.0 &&
# Ubuntu 22.04 && cppcheck 2.12.1 update
###################################################################
duplInheritedMember
constParameterCallback
constParameterReference
constParameterPointer
constVariableReference
constVariablePointer
useStlAlgorithm
uselessOverride
unusedScopedObject
FORCE
SOURCES
addkernels/
Expand Down
24 changes: 15 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:20.04 as miopen
FROM ubuntu:22.04 as miopen
ARG DEBIAN_FRONTEND=noninteractive

# Support multiarch
Expand All @@ -18,17 +18,17 @@ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg

RUN wget https://repo.radeon.com/amdgpu-install/5.7.1/ubuntu/focal/amdgpu-install_5.7.50701-1_all.deb --no-check-certificate
RUN wget https://repo.radeon.com/amdgpu-install/6.0/ubuntu/jammy/amdgpu-install_6.0.60000-1_all.deb --no-check-certificate
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
./amdgpu-install_5.7.50701-1_all.deb
./amdgpu-install_6.0.60000-1_all.deb

# Add rocm repository
RUN export ROCM_APT_VER=5.7.1;\
RUN export ROCM_APT_VER=6.0;\
echo $ROCM_APT_VER &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list' &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER focal main > /etc/apt/sources.list.d/rocm.list'
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCM_APT_VER/ubuntu jammy main > /etc/apt/sources.list.d/amdgpu.list' &&\
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/rocm/apt/$ROCM_APT_VER jammy main > /etc/apt/sources.list.d/rocm.list'
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu jammy main universe | tee -a /etc/apt/sources.list"

RUN amdgpu-install -y --usecase=rocm --no-dkms

Expand Down Expand Up @@ -96,11 +96,17 @@ RUN tar zxvf /tmp/ccache.tar.gz -C /tmp/ && mkdir /tmp/ccache-${CCACHE_COMMIT}/b
cd /tmp/ccache-${CCACHE_COMMIT}/build && \
cmake -DZSTD_FROM_INTERNET=ON -DHIREDIS_FROM_INTERNET=ON .. && make -j install && rm -rf /tmp/*
RUN ccache -s

# purge existing composable kernel installed with ROCm
# hence cannot use autoremove since it will remove more components
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get purge -y --allow-unauthenticated \
composablekernel-dev
ARG COMPILER_LAUNCHER=""
RUN if [ "$USE_FIN" = "ON" ]; then \
rbuild prepare -s fin -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
rbuild prepare -s fin -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
else \
rbuild prepare -s develop -d $PREFIX -DAMDGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
rbuild prepare -s develop -d $PREFIX -DGPU_TARGETS=${GPU_ARCH} -DCMAKE_CXX_COMPILER_LAUNCHER="${COMPILER_LAUNCHER}"; \
fi

RUN ccache -s
Expand Down
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ROCmSoftwarePlatform/rocm-recipes@d7b71f8ff71572833c8cf15b74279dd034e66f9d
-f requirements.txt
danmar/cppcheck@2.9
danmar/cppcheck@2.12.1
google/googletest@v1.14.0
2 changes: 1 addition & 1 deletion docs/DebugAndLogging.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Direct Solutions:
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11` - `ConvOclDirectFwd11x11`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN` - `ConvOclDirectFwdGen`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD` - `ConvOclDirectFwd`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1` - `ConvOclDirectFwd1x1`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2` - `ConvOclBwdWrW2<n>` (where n = `{1,2,4,8,16}`), and `ConvOclBwdWrW2NonTunable`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53` - `ConvOclBwdWrW53`.
* `MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1` - `ConvOclBwdWrW1x1`
Expand Down
2 changes: 1 addition & 1 deletion docs/sphinx/requirements.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
rocm-docs-core==0.30.2
rocm-docs-core==0.30.3
2 changes: 1 addition & 1 deletion docs/sphinx/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ requests==2.31.0
# via
# pygithub
# sphinx
rocm-docs-core==0.30.2
rocm-docs-core==0.30.3
# via -r requirements.in
smmap==5.0.0
# via gitdb
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On
boost@1.83 -DCMAKE_POSITION_INDEPENDENT_CODE=On --build -DCMAKE_CXX_FLAGS=" -std=c++14 -Wno-enum-constexpr-conversion -Wno-deprecated-builtins -Wno-deprecated-declarations "
facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build
ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH=/opt/conda/envs/py_3.9 -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
# ROCmSoftwarePlatform/half@10abd99e7815f0ca5d892f58dd7d15a23b7cf92c --build
ROCmSoftwarePlatform/rocMLIR@rocm-5.5.0 -H sha256:a5f62769d28a73e60bc8d61022820f050e97c977c8f6f6275488db31512e1f42 -DBUILD_FAT_LIBROCKCOMPILER=1 -DCMAKE_IGNORE_PATH="/opt/conda/envs/py_3.8;/opt/conda/envs/py_3.9;/opt/conda/envs/py_3.10" -DCMAKE_IGNORE_PREFIX_PATH=/opt/conda
nlohmann/json@v3.11.2 -DJSON_MultipleHeaders=ON -DJSON_BuildTests=Off
ROCmSoftwarePlatform/FunctionalPlus@v0.2.18-p0
ROCmSoftwarePlatform/eigen@3.4.0
ROCmSoftwarePlatform/frugally-deep@9683d557eb672ee2304f80f6682c51242d748a50
ROCmSoftwarePlatform/composable_kernel@0dacd895d5ba9c9eeb99588ec7f7df1da82f7fa9 -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON
ROCmSoftwarePlatform/composable_kernel@d0f355a31a341b0a885ff65231781f332a20cc5f -DCMAKE_BUILD_TYPE=Release -DINSTANCES_ONLY=ON
3 changes: 2 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ set( MIOpen_Source
solver/conv_bin_winoRxS_fused.cpp
solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp
solver/conv_ck_igemm_fwd_bias_activ_fused.cpp
solver/conv_ck_igemm_fwd_bias_res_add_activ_fused.cpp
solver/conv_direct_naive_conv.cpp
solver/conv_direct_naive_conv_bwd.cpp
solver/conv_direct_naive_conv_fwd.cpp
Expand Down Expand Up @@ -769,7 +770,7 @@ elseif(MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN_BACKEND STREQUAL "HIP")
endif()
if(ENABLE_HIP_WORKAROUNDS)
# Workaround hip not setting its usage requirements correctly
target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_HCC__=1 )
target_compile_definitions( MIOpen PRIVATE -D__HIP_PLATFORM_AMD__=1 )
endif()
# This is helpful for the tests
target_link_libraries( MIOpen INTERFACE $<BUILD_INTERFACE:hip::device> )
Expand Down
6 changes: 4 additions & 2 deletions src/comgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1293,8 +1293,10 @@ void BuildHip(const std::string& name,
miopen::SplitSpaceSeparated(options, miopen::comgr::compiler::lc::GetOptionsNoSplit());
compiler::lc::RemoveOptionsUnwanted(opts);
opts.push_back("-DWORKAROUND_ISSUE_HIPRTC_TRUE_TYPE"); // Workaround for SWDEV-308073
opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround?
#if HIP_PACKAGE_VERSION_FLAT < 6000023494ULL
opts.push_back("-D__HIP_PLATFORM_HCC__=1"); // Workaround?
#endif
opts.push_back("-D__HIP_PLATFORM_AMD__=1"); // Workaround?
#if ROCM_FEATURE_LLVM_AMDGCN_BUFFER_ATOMIC_FADD_F32_RETURNS_FLOAT
if(miopen::solver::support_amd_buffer_atomic_fadd(target.Name()))
opts.push_back("-DCK_AMD_BUFFER_ATOMIC_FADD_RETURNS_FLOAT=1");
Expand Down
2 changes: 1 addition & 1 deletion src/composable_kernel/.clang-tidy
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CheckOptions:
- key: bugprone-reserved-identifier.AllowedIdentifiers
value: '__HIP_PLATFORM_HCC__;__HIP_ROCclr__'
value: '__HIP_PLATFORM_AMD__;__HIP_ROCclr__'
2 changes: 1 addition & 1 deletion src/composable_kernel/cmake/ClangTidy.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ function(clang_tidy_check TARGET)
add_custom_target(${tidy_target}
# for some targets clang-tidy not able to get information from .clang-tidy
DEPENDS ${SOURCE}
COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_AMD__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..."
)
Expand Down
10 changes: 5 additions & 5 deletions src/composable_kernel/external/rocm/include/bfloat16_dev.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
extern "C" {
#endif

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
#define EXECUTION_SPECIFIER __device__
#else
#define EXECUTION_SPECIFIER
Expand All @@ -43,7 +43,7 @@ typedef union

// Composable kernels are written in HIP language. The language doesnt support
// ushort2.hi or ushort2.low.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
ushort ushortvec[2];
#endif // MIOPEN_BACKEND_HIP
float f32;
Expand All @@ -53,7 +53,7 @@ EXECUTION_SPECIFIER float bfloat16_to_float(ushort src_val)
{
cvt_bf16_fp32_t target_val;

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.ushortx2 = make_ushort2(0, src_val);
#else
target_val.ushortx2 = (ushort2)(0, src_val);
Expand Down Expand Up @@ -102,7 +102,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
// When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
// incrementing it causes it to become an exponent of 0xFF and a mantissa
// of 0x00, which is Inf, the next higher value to the unrounded value.
#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
target_val.u32 += (0x7fff + (target_val.ushortvec[1] & 1));
#else
target_val.u32 +=
Expand All @@ -111,7 +111,7 @@ EXECUTION_SPECIFIER ushort float_to_bfloat16(float src_val)
#endif // MIOPEN_USE_RNE_BFLOAT16
}

#ifdef __HIP_PLATFORM_HCC__
#ifdef __HIP_PLATFORM_AMD__
return target_val.ushortvec[1];
#else
return target_val.ushortx2.hi;
Expand Down
2 changes: 1 addition & 1 deletion src/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor&
}
}

std::size_t out_c;
std::size_t out_c = 0;
std::vector<std::size_t> out_lens(spatial_dim + 2);

auto out_spatial = boost::adaptors::slice(out_lens, 2, 2 + spatial_dim);
Expand Down
87 changes: 75 additions & 12 deletions src/fusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ miopenStatus_t ConvBiasActivFusion(Handle& handle,
assert(workspaceSizeInBytes == 0);
std::ignore = workspace;
std::ignore = workspaceSizeInBytes;
/// \todo: add workspace support in fusion

/*
if(alpha1 != nullptr)
{
const auto falpha1 = *(static_cast<const float*>(alpha1));
Expand All @@ -92,29 +95,46 @@ miopenStatus_t ConvBiasActivFusion(Handle& handle,
if(falpha2 != 1.0f)
MIOPEN_THROW(miopenStatusNotImplemented, "alpha2 can only be 1.0");
}
if(z != nullptr || zDesc.GetSize() != 0)
MIOPEN_THROW(miopenStatusNotImplemented, "The addition of z vector is not yet supported");
*/

// TODO: The type of these pointers depends on the ConvolutionDescriptor's data
// type
float falpha1 = alpha1 != nullptr ? *(static_cast<const float*>(alpha1)) : 1.0f;
float falpha2 = alpha2 != nullptr ? *(static_cast<const float*>(alpha2)) : 1.0f;

// if(z != nullptr || zDesc.GetSize() != 0)
// MIOPEN_THROW(miopenStatusNotImplemented, "The addition of z vector is not yet supported");
FusionPlanDescriptor fusePlanDesc{miopenVerticalFusion, xDesc};
OperatorArgs fusionArgs;
auto convoOp = std::make_shared<ConvForwardOpDescriptor>(conv_desc, wDesc);
auto convOp = std::make_shared<ConvForwardOpDescriptor>(conv_desc, wDesc);
auto zOp = std::make_shared<TensorScaleAddOpDescriptor>(zDesc);
auto biasOp = std::make_shared<BiasFusionOpDescriptor>(biasDesc);
auto activOp = std::make_shared<ActivFwdFusionOpDescriptor>(activationDesc.GetMode());
MIOPEN_CHECK(fusePlanDesc.AddOp(convoOp));

if(activationDesc.GetMode() != miopenActivationRELU)
{
MIOPEN_THROW(miopenStatusNotImplemented,
"only Activation Mode == miopenActivationRELU is supported");
}

MIOPEN_CHECK(fusePlanDesc.AddOp(convOp));
MIOPEN_CHECK(fusePlanDesc.SetConvAlgo(algo));
MIOPEN_CHECK(fusePlanDesc.AddOp(zOp));
MIOPEN_CHECK(fusePlanDesc.AddOp(biasOp));
MIOPEN_CHECK(fusePlanDesc.AddOp(activOp));

MIOPEN_CHECK(fusePlanDesc.Compile(handle));
float alpha = static_cast<float>(1.0);
float beta = static_cast<float>(0);
float alpha = 1.0f;
float beta = 0.0f;
float activ_alpha = activationDesc.GetAlpha();
float activ_beta = activationDesc.GetBeta();
float activ_gamma = activationDesc.GetGamma();

// Set the Args
MIOPEN_CHECK(convoOp->SetArgs(fusionArgs, &alpha, &beta, w));
MIOPEN_CHECK(activOp->SetArgs(fusionArgs, &alpha, &beta, activ_alpha, activ_beta, activ_gamma));
MIOPEN_CHECK(convOp->SetArgs(fusionArgs, &falpha1, &beta, w));
MIOPEN_CHECK(zOp->SetArgs(fusionArgs, falpha2, z));
MIOPEN_CHECK(biasOp->SetArgs(fusionArgs, &alpha, &beta, bias));
MIOPEN_CHECK(activOp->SetArgs(fusionArgs, &alpha, &beta, activ_alpha, activ_beta, activ_gamma));
MIOPEN_CHECK(fusePlanDesc.Execute(handle, xDesc, x, yDesc, y, fusionArgs));
return miopenStatusSuccess;
}
Expand All @@ -140,6 +160,8 @@ AllocateBuffersAndMakeFusionInvokeParams(Handle& handle,
const auto bn_inf_id = solver::fusion::GetOpIdx(plan.op_map, miopenFusionOpBatchNormInference);
const auto bn_fwd_id = solver::fusion::GetOpIdx(plan.op_map, miopenFusionOpBatchNormFwdTrain);
const auto bn_bwd_id = solver::fusion::GetOpIdx(plan.op_map, miopenFusionOpBatchNormBwdTrain);
const auto tensor_add_op_id =
solver::fusion::GetOpIdx(plan.op_map, miopenFusionOpTensorScaleAdd);

const auto any_activ = activ_fwd_id != -1 || activ_bwd_id != -1;
const auto any_bn = bn_inf_id != -1 || bn_fwd_id != -1 || bn_bwd_id != -1;
Expand Down Expand Up @@ -198,6 +220,20 @@ AllocateBuffersAndMakeFusionInvokeParams(Handle& handle,
}
}

if(tensor_add_op_id != -1)
{
const auto& tensor_add_op =
dynamic_cast<const TensorScaleAddOpDescriptor&>(*plan.op_map[tensor_add_op_id]);
assert(&tensor_add_op);

float alpha = 1.0f;
const auto space = tensor_add_op.tensor_desc.GetNumBytes();
auto ptr = allocate_buffer(space);

params.SetArg(tensor_add_op_id,
std::make_unique<miopen::fusion::TensorScaleAddOpInvokeParam>(alpha, ptr));
}

if(any_bn)
{
const auto epsilon = 0.00001;
Expand Down Expand Up @@ -512,12 +548,24 @@ miopenStatus_t ConvForwardOpDescriptor::GetOutputDesc(TensorDescriptor& output_d
[&]() { output_desc = base_desc.GetForwardOutputTensor(input_desc, filter_desc); });
}

/*
miopenStatus_t
ConvForwardOpDescriptor::SetArgs(OperatorArgs& args, float alpha, float beta, ConstData_t w)
{
auto op_args = std::make_unique<fusion::ConvolutionOpInvokeParam>(alpha, beta, w);
args.SetArg(GetIdx(), std::move(op_args));
return miopenStatusSuccess;
}
*/

miopenStatus_t ConvForwardOpDescriptor::SetArgs(OperatorArgs& args,
const void* /*alpha*/,
const void* /*beta*/,
const void* alpha,
const void* beta,
ConstData_t w)
{
auto op_args = std::make_unique<fusion::ConvolutionOpInvokeParam>(w);
float falpha = alpha != nullptr ? *reinterpret_cast<const float*>(alpha) : 1.0f;
float fbeta = beta != nullptr ? *reinterpret_cast<const float*>(beta) : 0.0f;
auto op_args = std::make_unique<fusion::ConvolutionOpInvokeParam>(falpha, fbeta, w);
args.SetArg(GetIdx(), std::move(op_args));
return miopenStatusSuccess;
}
Expand Down Expand Up @@ -672,6 +720,20 @@ miopenStatus_t BiasFusionOpDescriptor::SetArgs(OperatorArgs& args,
return miopenStatusSuccess;
}

miopenStatus_t TensorScaleAddOpDescriptor::GetOutputDesc(TensorDescriptor& output_desc) const
{
output_desc = this->tensor_desc;
return miopenStatusSuccess;
}

miopenStatus_t
TensorScaleAddOpDescriptor::SetArgs(OperatorArgs& args, float alpha, ConstData_t tensor_ptr)
{
auto op_args = std::make_unique<fusion::TensorScaleAddOpInvokeParam>(alpha, tensor_ptr);
args.SetArg(GetIdx(), std::move(op_args));
return miopenStatusSuccess;
}

std::string FusionPlanDescriptor::GetAlgorithmName(const Handle& /*handle*/)
{
if(conv_fwd_algo)
Expand All @@ -698,7 +760,8 @@ static auto GetFusedDirectSolvers()

static auto GetFusedIGemmSolvers()
{
return solver::SolverContainer<solver::fusion::ConvCKIgemmFwdBiasActivFused>{};
return solver::SolverContainer<solver::fusion::ConvCKIgemmFwdBiasActivFused,
solver::fusion::ConvCKIgemmFwdBiasResAddActivFused>{};
}

static auto GetFusedWinogradSolvers()
Expand Down
11 changes: 11 additions & 0 deletions src/include/miopen/fusion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,16 @@ struct BiasFusionOpDescriptor : FusionOpDescriptor
TensorDescriptor base_desc;
};

struct TensorScaleAddOpDescriptor : public FusionOpDescriptor
{
TensorScaleAddOpDescriptor(const TensorDescriptor& desc) : tensor_desc(desc) {}
miopenStatus_t GetOutputDesc(TensorDescriptor& output_desc) const override;
miopenStatus_t GetNetworkConfig(std::ostringstream& network_config) override;
miopenStatus_t SetArgs(OperatorArgs& args, float alpha, ConstData_t tensor_ptr);
miopenFusionOp_t kind() const override { return miopenFusionOpTensorScaleAdd; };
TensorDescriptor tensor_desc;
};

struct ActivFwdFusionOpDescriptor : FusionOpDescriptor
{
ActivFwdFusionOpDescriptor(miopenActivationMode_t mode) : activMode(mode) {}
Expand Down Expand Up @@ -215,6 +225,7 @@ struct ConvForwardOpDescriptor : FusionOpDescriptor
conv_compiler_options(""){};
miopenStatus_t GetOutputDesc(TensorDescriptor& output_desc) const override;
miopenStatus_t SetArgs(OperatorArgs& args, const void* alpha, const void* beta, ConstData_t w);
// miopenStatus_t SetArgs(OperatorArgs& args, float alpha, float beta, ConstData_t w);
miopenStatus_t GetNetworkConfig(std::ostringstream& network_config) override;
bool isASMApplicable(Handle& handle);
miopenFusionOp_t kind() const override { return miopenFusionOpConvForward; };
Expand Down
Loading

0 comments on commit b2eeb23

Please sign in to comment.