Skip to content

Commit

Permalink
Merge branch 'develop' into wa-issue-1315-1317-312112-313696
Browse files Browse the repository at this point in the history
  • Loading branch information
atamazov committed Dec 4, 2021
2 parents ed89af5 + cd0d942 commit 62a3b2d
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 5 deletions.
6 changes: 4 additions & 2 deletions src/conv/invokers/impl_gemm_dynamic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,8 @@ InvokerFactory MakeImplGemmDynamicForwardXdlopsNHWCInvokerFactory(
trans_output_idx = idx++;
}

const size_t cast_offset = is_nchw ? (trans_output_offset + trans_output_size) : 0;
// 4 bytes alignment to do atomic add
const size_t cast_offset = is_nchw ? (((trans_output_offset + trans_output_size + 3) >> 2) << 2) : 0;
const size_t cast_size = need_cast ? miopen::GetTypeSize(miopenFloat) * n * k * ho * wo : 0;

const int kID_trans_start = isGfx90aFp16altSupport ? 2 : 1;
Expand Down Expand Up @@ -886,7 +887,8 @@ InvokerFactory MakeImplGemmDynamicBackwardDataXdlopsNHWCInvokerFactory(
trans_output_idx = idx++;
}

const size_t cast_offset = is_nchw ? (trans_output_offset + trans_output_size) : 0;
// 4 bytes alignment to do atomic add
const size_t cast_offset = is_nchw ? (((trans_output_offset + trans_output_size + 3) >> 2) << 2) : 0;
const size_t cast_size = need_cast ? miopen::GetTypeSize(miopenFloat) * n * c * hi * wi : 0;

const int kID_trans_start = isGfx90aFp16altSupport ? 2 : 1;
Expand Down
6 changes: 4 additions & 2 deletions src/solver/batchnorm/backward_spatial_single.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,11 @@ BnBwdTrainingSpatialSingle::GetSolution(const ExecutionContext& context,
(StartsWith(handle.GetDeviceName(), "gfx8") ||
(StartsWith(handle.GetDeviceName(), "gfx9")
#if WORKAROUND_ISSUE_1146
&& (handle.GetDeviceName() != "gfx90a")
&&
(handle.GetDeviceName() != "gfx90a")
#endif
)) &&
&&
(handle.GetDeviceName() != "gfx940"))) &&
(!handle.GetTargetProperties().Xnack() || !*handle.GetTargetProperties().Xnack()))
{
kernel.kernel_file = "gcnAsmBNBwdTrainSpatial.s";
Expand Down
3 changes: 3 additions & 0 deletions src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,9 @@ ConvAsmImplicitGemmGTCDynamicBwdXdlopsNHWC::GetWorkspaceSize(const ConvolutionCo
workspace_size += trans_weight.GetSize();
if(!trans_output.IsSkippable())
workspace_size += trans_output.GetSize();

// 4 bytes alignment to do atomic add
workspace_size = ((workspace_size + 3) >> 2) << 2;
}

if(!ctx.IsFp32())
Expand Down
3 changes: 3 additions & 0 deletions src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,9 @@ ConvAsmImplicitGemmGTCDynamicFwdXdlopsNHWC::GetWorkspaceSize(const ConvolutionCo
workspace_size += trans_weight.GetSize();
if(!trans_output.IsSkippable())
workspace_size += trans_output.GetSize();

// 4 bytes alignment to do atomic add
workspace_size = ((workspace_size + 3) >> 2) << 2;
}

if(!ctx.IsFp32())
Expand Down
6 changes: 5 additions & 1 deletion src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,9 @@ ConvAsmImplicitGemmGTCDynamicWrwXdlopsNHWC::GetWorkspaceSize(const ConvolutionCo
workspace_size += trans_weight.GetSize();
if(!trans_output.IsSkippable())
workspace_size += trans_output.GetSize();

// 4 bytes alignment to do atomic add
workspace_size = ((workspace_size + 3) >> 2) << 2;
}

if(!ctx.IsFp32())
Expand Down Expand Up @@ -1076,7 +1079,8 @@ ConvSolution ConvAsmImplicitGemmGTCDynamicWrwXdlopsNHWC::GetSolution(

MIOPEN_LOG_I2(SolverDbId(*this) << ": " << config.ToString() << msg.str());

const size_t cast_offset = is_nchw ? (trans_output_offset + trans_output_size) : 0;
// 4 bytes alignment to do atomic add
const size_t cast_offset = is_nchw ? (((trans_output_offset + trans_output_size + 3) >> 2) << 2) : 0;
const size_t cast_size = need_cast ?
miopen::GetTypeSize(miopenFloat) * k * (c / group) * y * x : 0;

Expand Down
3 changes: 3 additions & 0 deletions src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ bool ConvCkIgemmFwdV6r1DlopsNchw::IsApplicable(const ConvolutionContext& ctx) co
return false;
if(ctx.group_counts != 1)
return false;
if(ctx.GetStream().GetTargetProperties().Name() == "gfx90a" &&
ctx.conv_problem.IsGfx90aFp16altRequired())
return false;

{
// this kernel use int32_t for memory offset, which covers 2GB of memory maximum
Expand Down

0 comments on commit 62a3b2d

Please sign in to comment.