Skip to content

Commit

Permalink
Deprecate FAST_HYBRID Find mode (#1358)
Browse files Browse the repository at this point in the history
  • Loading branch information
atamazov authored Dec 23, 2021
1 parent adcef3b commit 7f590d2
Show file tree
Hide file tree
Showing 19 changed files with 17 additions and 91 deletions.
6 changes: 3 additions & 3 deletions doc/src/find_and_immediate.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,10 @@ MIOpen provides a set of Find modes which are used to accelerate the Find calls.
- `NORMAL`, or `1`: Normal Find: This is the full Find mode call, which will benchmark all the solvers and return a list.
- `FAST`, or `2`: Fast Find: Checks the [Find-Db](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/finddb.html) for an entry. If there is a Find-Db hit, use that entry. If there is a miss, utilize the Immediate mode fallback. If Start-up times are expected to be faster, but worse GPU performance.
- `HYBRID`, or `3`, or unset `MIOPEN_FIND_MODE`: Hybrid Find: Checks the [Find-Db](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/finddb.html) for an entry. If there is a Find-Db hit, use that entry. If there is a miss, use the existing Find machinery. Slower start-up times than Fast Find, but no GPU performance drop.
- `FAST_HYBRID`, or `4`: Fast Hybrid Find: Checks the [Find-Db](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/finddb.html) for an entry. If there is a Find-Db hit, uses that entry. If there is a miss, uses the existing Find machinery with skipping slow-compiling kernels. Faster start-up times than Hybrid Find, but GPU performance is a bit worse.
- `DYNAMIC_HYBRID`, or `5`: Dynamic Hybrid Find: This mode is similar to Fast Hybrid, but in case of Find-db miss, skips all non-dynamic kernels, thus saving compilation time. Versus FAST_HYBRID, we expect similar start-up times but better GPU performance. Use with caution, this mode is experimental for now.
- `4`: This value is reserved and should not be used.
- `DYNAMIC_HYBRID`, or `5`: Dynamic Hybrid Find: Checks the [Find-Db](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/finddb.html) for an entry. If there is a Find-Db hit, uses that entry. If there is a miss, uses the existing Find machinery with skipping non-dynamic kernels. Faster start-up times than Hybrid Find, but GPU performance may be a bit worse.

As of MIOpen 2.7, the default mode is set to `HYBRID` mode as default. To run the full `NORMAL` Find mode, set the environment as:
Currently, the default Find mode is `DYNAMIC_HYBRID`. To run the full `NORMAL` Find mode, set the environment as:
```
export MIOPEN_FIND_MODE=NORMAL
```
Expand Down
6 changes: 0 additions & 6 deletions src/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,6 @@ std::size_t ConvolutionDescriptor::ForwardGetWorkSpaceSize(Handle& handle,
GetForwardSolutions(handle, wDesc, xDesc, yDesc, 1, &count, &sol, &fallback);
if(count < 1 || (findMode.IsHybrid(ctx) && fallback))
{
ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage =
findMode.IsFastHybrid(ctx);
ctx.use_dynamic_solutions_only = findMode.IsDynamicHybrid(ctx);
break; // Fall down to Normal Find.
}
Expand Down Expand Up @@ -438,8 +436,6 @@ ConvolutionDescriptor::BackwardDataGetWorkSpaceSize(Handle& handle,
GetBackwardSolutions(handle, dyDesc, wDesc, dxDesc, 1, &count, &sol, &fallback);
if(count < 1 || (findMode.IsHybrid(ctx) && fallback))
{
ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage =
findMode.IsFastHybrid(ctx);
ctx.use_dynamic_solutions_only = findMode.IsDynamicHybrid(ctx);
break; // Fall down to Normal Find.
}
Expand Down Expand Up @@ -730,8 +726,6 @@ ConvolutionDescriptor::BackwardWeightsGetWorkSpaceSize(Handle& handle,
GetWrwSolutions(handle, dyDesc, xDesc, dwDesc, 1, &count, &sol, &fallback);
if(count < 1 || (findMode.IsHybrid(ctx) && fallback))
{
ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage =
findMode.IsFastHybrid(ctx);
ctx.use_dynamic_solutions_only = findMode.IsDynamicHybrid(ctx);
break; // Fall down to Normal Find.
}
Expand Down
7 changes: 1 addition & 6 deletions src/find_controls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ const char* ToCString(const FindMode::Values mode)
case FindMode::Values::Normal: return "NORMAL";
case FindMode::Values::Fast: return "FAST";
case FindMode::Values::Hybrid: return "HYBRID";
case FindMode::Values::FastHybrid: return "FAST_HYBRID";
case FindMode::Values::DeprecatedFastHybrid: break;
case FindMode::Values::DynamicHybrid: return "DYNAMIC_HYBRID";
case FindMode::Values::End_: break;
}
Expand All @@ -191,8 +191,6 @@ FindMode::Values GetFindModeValueImpl2()
return FindMode::Values::Fast;
else if(str == "HYBRID")
return FindMode::Values::Hybrid;
else if(str == "FAST_HYBRID")
return FindMode::Values::FastHybrid;
else if(str == "DYNAMIC_HYBRID")
return FindMode::Values::DynamicHybrid;
else
Expand Down Expand Up @@ -232,9 +230,6 @@ static_assert(miopenConvolutionFindModeFast ==
static_assert(miopenConvolutionFindModeHybrid ==
static_cast<miopenConvolutionFindMode_t>(FindMode::Values::Hybrid),
"API is not in sync with the implementation.");
static_assert(miopenConvolutionFindModeFastHybrid ==
static_cast<miopenConvolutionFindMode_t>(FindMode::Values::FastHybrid),
"API is not in sync with the implementation.");
static_assert(miopenConvolutionFindModeDynamicHybrid ==
static_cast<miopenConvolutionFindMode_t>(FindMode::Values::DynamicHybrid),
"API is not in sync with the implementation.");
Expand Down
16 changes: 4 additions & 12 deletions src/include/miopen/execution_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,8 @@ struct ExecutionContext
// to optimize the getWorkspaceSize() calls for speed. This specific optimization is correct
// because Solvers shall be written so that the required workspace size does not depend on the
// performance config.
bool disable_perfdb_access = false;
bool skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage = false;
bool use_dynamic_solutions_only = false;
bool disable_perfdb_access = false;
bool use_dynamic_solutions_only = false;

inline Handle& GetStream() const { return *stream; }
inline void SetStream(Handle* stream_) { stream = stream_; }
Expand Down Expand Up @@ -275,25 +274,18 @@ struct ExecutionContext

class AutoUseFastDynamicSolutions
{
bool prev_skip_slow_;
bool prev_use_dynamic_;
ExecutionContext* const ctx;

public:
AutoUseFastDynamicSolutions(ExecutionContext& ctx_) : ctx(&ctx_)
{
prev_skip_slow_ = ctx->skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage;
prev_use_dynamic_ = ctx->use_dynamic_solutions_only;

ctx->skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage = true;
ctx->use_dynamic_solutions_only = true;
ctx->use_dynamic_solutions_only = true;
}

~AutoUseFastDynamicSolutions()
{
ctx->skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage = prev_skip_slow_;
ctx->use_dynamic_solutions_only = prev_use_dynamic_;
}
~AutoUseFastDynamicSolutions() { ctx->use_dynamic_solutions_only = prev_use_dynamic_; }
};

bool IsHipKernelsEnabled(const TargetProperties& target);
Expand Down
12 changes: 2 additions & 10 deletions src/include/miopen/find_controls.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ class FindMode
Normal = Begin_,
Fast,
Hybrid,
FastHybrid,
DeprecatedFastHybrid,
DynamicHybrid,
End_,
Default_ = DynamicHybrid,
Expand Down Expand Up @@ -148,15 +148,7 @@ class FindMode
template <class Context>
bool IsHybrid(const Context& context) const
{
return (value == Values::Hybrid || value == Values::FastHybrid ||
value == Values::DynamicHybrid) &&
IsEnabled(context);
}

template <class Context>
bool IsFastHybrid(const Context& context) const
{
return value == Values::FastHybrid && IsEnabled(context);
return (value == Values::Hybrid || value == Values::DynamicHybrid) && IsEnabled(context);
}

template <class Context>
Expand Down
13 changes: 5 additions & 8 deletions src/include/miopen/miopen_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,17 @@ extern "C" {
* use the existing Find machinery. Slower start-up times than Fast Find, but no GPU performance
* drop.
*
* * Fast Hybrid: Checks the Find-db for an entry. If there is a hit, use that entry. If there is a
* miss, uses the existing Find machinery with skipping slow-compiling kernels. Faster start-up
* times than Hybrid Find, but GPU performance is a bit worse.
*
* * Dynamic Hybrid: This mode is similar to Fast Hybrid, but in case of Find-db miss skips all
* non-dynamic kernels, thus saving compilation time. Versus Fast Hybrid, we expect similar start-up
* times but better GPU performance.
* * Dynamic Hybrid: Checks the Find-db for an entry. If there is a hit, uses that entry. If there
* is a miss, uses the existing Find machinery with skipping non-dynamic kernels, thus saving
* compilation time.slow-compiling kernels. Faster start-up times than Hybrid Find, but GPU
* performance may be a bit worse.
*/
typedef enum
{
miopenConvolutionFindModeNormal = 1, /*!< Normal mode */
miopenConvolutionFindModeFast = 2, /*!< Fast mode */
miopenConvolutionFindModeHybrid = 3, /*!< Hybrid mode */
miopenConvolutionFindModeFastHybrid = 4, /*!< Fast Hybrid mode */
miopenConvolutionFindModeReserved_4 = 4, /*!< Reserved - do not use */
miopenConvolutionFindModeDynamicHybrid = 5, /*!< Dynamic Hybrid mode */
miopenConvolutionFindModeDefault =
miopenConvolutionFindModeDynamicHybrid, /*!< Default setting */
Expand Down
15 changes: 2 additions & 13 deletions src/ocl/convolutionocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,6 @@ ConvolutionDescriptor::FindDataDirectSolutions(Handle& handle,

const auto dir = isForward ? conv::Direction::Forward : conv::Direction::BackwardData;
auto ctx = ConvolutionContext{xDesc, wDesc, yDesc, *this, dir};
ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage =
findMode.IsFastHybrid(ctx);
ctx.use_dynamic_solutions_only = findMode.IsDynamicHybrid(ctx);
ctx.do_search = exhaustiveSearch;
ctx.save_srch_req = true;
Expand Down Expand Up @@ -220,8 +218,6 @@ ConvolutionDescriptor::FindDataImplicitGemmSolutions(Handle& handle,
const auto dir = isForward ? conv::Direction::Forward : conv::Direction::BackwardData;
auto ctx = ConvolutionContext{xDesc, wDesc, yDesc, *this, dir};

ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage =
findMode.IsFastHybrid(ctx);
ctx.use_dynamic_solutions_only = findMode.IsDynamicHybrid(ctx);
ctx.do_search = exhaustiveSearch;
ctx.save_srch_req = true;
Expand Down Expand Up @@ -494,8 +490,6 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle,
ConvolutionUserBuffers bufs(workSpace, workSpaceSize);
bufs.SetFwd(x, w, y);
ctx.SetBufs(bufs);
ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage =
findMode.IsFastHybrid(ctx);
ctx.use_dynamic_solutions_only = findMode.IsDynamicHybrid(ctx);
perf_db = UserFindDbRecord::TryLoad(handle, problem, [&](DbRecord& record) {
DirConvFindCore(handle,
Expand Down Expand Up @@ -1130,15 +1124,12 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle,
}();

perf_db = UserFindDbRecord::TryLoad(handle, problem, [&](DbRecord& record) {
const auto network_config = problem.BuildConfKey();
const auto invoke_ctx = conv::DataInvokeParams{InvokeType::Evaluate,
const auto network_config = problem.BuildConfKey();
const auto invoke_ctx = conv::DataInvokeParams{InvokeType::Evaluate,
{dyDesc, dy, wDesc, w, dxDesc, dx},
workSpace,
workSpaceSize,
this->attribute.gfx90aFp16alt.GetBwd()};

ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage =
findMode.IsFastHybrid(ctx);
ctx.use_dynamic_solutions_only = findMode.IsDynamicHybrid(ctx);

// Find solutions
Expand Down Expand Up @@ -1494,8 +1485,6 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle,
perf_db = UserFindDbRecord::TryLoad(handle, problem, [&](DbRecord& record) {
ConvolutionUserBuffers bufs(workSpace, workSpaceSize);
bufs.SetWrW(x, dw, dy);
ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage =
findMode.IsFastHybrid(ctx);
ctx.use_dynamic_solutions_only = findMode.IsDynamicHybrid(ctx);
ctx.do_search = exhaustiveSearch;
ctx.SetStream(&handle);
Expand Down
2 changes: 0 additions & 2 deletions src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,6 @@ bool ConvHipImplicitGemmBwdDataV1R1::IsApplicable(const ConvolutionContext& ctx)
{
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{}))
return false;
if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;
if(!ctx.use_hip_kernels)
return false;
if(!ctx.IsLayoutDefault())
Expand Down
3 changes: 0 additions & 3 deletions src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -748,9 +748,6 @@ bool ConvHipImplicitGemmBwdDataV1R1Xdlops::IsApplicable(const ConvolutionContext
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{}))
return false;

if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;

if(!IsComposableKernelSupportedHardware(ctx))
return false;

Expand Down
3 changes: 0 additions & 3 deletions src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -745,9 +745,6 @@ bool ConvHipImplicitGemmBwdDataV4R1::IsApplicable(const ConvolutionContext& ctx)
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{}))
return false;

if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;

if(!IsComposableKernelSupportedHardware(ctx))
return false;

Expand Down
2 changes: 0 additions & 2 deletions src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -824,8 +824,6 @@ bool ConvHipImplicitGemmBwdDataV4R1Xdlops::IsApplicable(const ConvolutionContext
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}))
return false;
#endif
if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;
if(!IsComposableKernelSupportedHardware(ctx))
return false;
if(!ctx.direction.IsBackwardData())
Expand Down
4 changes: 0 additions & 4 deletions src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ bool ConvHipImplicitGemmV4R1Fwd::IsApplicable(const ConvolutionContext& ctx) con
{
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1{}))
return false;
if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;
if(!IsComposableKernelSupportedHardware(ctx))
return false;
if(!ctx.direction.IsForward())
Expand Down Expand Up @@ -82,8 +80,6 @@ bool ConvHipImplicitGemmV4R1WrW::IsApplicable(const ConvolutionContext& ctx) con
{
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R1{}))
return false;
if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;
if(!IsComposableKernelSupportedHardware(ctx))
return false;
if(!ctx.direction.IsBackwardWrW())
Expand Down
2 changes: 0 additions & 2 deletions src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -586,8 +586,6 @@ bool ConvHipImplicitGemmV4R4Fwd::IsApplicable(const ConvolutionContext& ctx) con
{
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4{}))
return false;
if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;
if(!ctx.use_hip_kernels)
return false;
if(!ctx.IsLayoutDefault())
Expand Down
3 changes: 0 additions & 3 deletions src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -967,9 +967,6 @@ bool ConvHipImplicitGemmForwardV4R4Xdlops::IsApplicable(const ConvolutionContext
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_XDLOPS{}))
return false;

if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;

if(!ctx.use_hip_kernels)
return false;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1043,9 +1043,6 @@ bool ConvHipImplicitGemmForwardV4R4Xdlops_Padded_Gemm::IsApplicable(
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_PADDED_GEMM_XDLOPS{}))
return false;

if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;

if(!ctx.use_hip_kernels)
return false;

Expand Down
3 changes: 0 additions & 3 deletions src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -997,9 +997,6 @@ bool ConvHipImplicitGemmForwardV4R5Xdlops::IsApplicable(const ConvolutionContext
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R5_XDLOPS{}))
return false;

if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;

if(!ctx.use_hip_kernels)
return false;

Expand Down
2 changes: 0 additions & 2 deletions src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,8 +590,6 @@ bool ConvHipImplicitGemmV4R4WrW::IsApplicable(const ConvolutionContext& ctx) con
{
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4{}))
return false;
if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;
if(!ctx.use_hip_kernels)
return false;
if(!ctx.IsLayoutDefault())
Expand Down
3 changes: 0 additions & 3 deletions src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1036,9 +1036,6 @@ bool ConvHipImplicitGemmWrwV4R4Xdlops::IsApplicable(const ConvolutionContext& ct
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_XDLOPS{}))
return false;

if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;

if(!ctx.use_hip_kernels)
return false;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1108,9 +1108,6 @@ bool ConvHipImplicitGemmWrwV4R4Xdlops_Padded_Gemm::IsApplicable(const Convolutio
if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_PADDED_GEMM_XDLOPS{}))
return false;

if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage)
return false;

if(!IsComposableKernelSupportedHardware(ctx))
return false;

Expand Down

0 comments on commit 7f590d2

Please sign in to comment.