Skip to content

Commit

Permalink
Fix transposed convolutions (#2487)
Browse files Browse the repository at this point in the history
  • Loading branch information
atamazov authored and junliume committed Oct 27, 2023
1 parent 72ce416 commit 82f685c
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 147 deletions.
70 changes: 30 additions & 40 deletions driver/conv_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ class ConvDriver : public Driver

miopenConvolutionDescriptor_t convDesc;
miopenConvolutionDescriptor_t warmupConvDesc;
miopenConvolutionMode_t mode;

bool is_wrw = true, is_bwd = true, is_fwd = true;
bool is_wrw_winograd = false;
Expand Down Expand Up @@ -536,6 +537,19 @@ int ConvDriver<Tgpu, Tref>::ParseCmdLineArgs(int argc, char* argv[])
inflags.GetValueStr("out_layout") + "c" + std::to_string(vector_length));
}

if((inflags.GetValueStr("mode")) == "conv")
{
mode = miopenConvolution;
}
else if((inflags.GetValueStr("mode")) == "trans")
{
mode = miopenTranspose;
}
else
{
MIOPEN_THROW("Incorrect Convolution Mode\n");
}

num_iterations = inflags.GetValueInt("iter");
if(num_iterations < 1)
{
Expand Down Expand Up @@ -755,13 +769,12 @@ int ConvDriver<Tgpu, Tref>::GetandSetData()
std::vector<int> pads = {0, 0};
std::vector<int> conv_strides = {1, 1};
std::vector<int> conv_dilations = {1, 1};
miopenConvolutionMode_t mode = miopenConvolution;
miopenInitConvolutionNdDescriptor(warmupConvDesc,
spatial_dim,
pads.data(),
conv_strides.data(),
conv_dilations.data(),
mode);
miopenConvolution);
miopenSetConvolutionFindMode(warmupConvDesc, miopenConvolutionFindModeNormal);
miopenHiddenSetConvolutionFindMode(
warmupConvDesc,
Expand Down Expand Up @@ -980,20 +993,6 @@ std::vector<int> ConvDriver<Tgpu, Tref>::GetWeightTensorLengthsFromCmdLine()
}
}

miopenConvolutionMode_t mode;
if((inflags.GetValueStr("mode")) == "conv")
{
mode = miopenConvolution;
}
else if((inflags.GetValueStr("mode")) == "trans")
{
mode = miopenTranspose;
}
else
{
MIOPEN_THROW("Incorrect Convolution Mode\n");
}

if(mode == miopenTranspose)
{
wei_lens[0] = wei_c_len;
Expand Down Expand Up @@ -1087,21 +1086,6 @@ int ConvDriver<Tgpu, Tref>::SetConvDescriptorFromCmdLineArgs()
}
}

miopenConvolutionMode_t mode;
if((inflags.GetValueStr("mode")) == "conv")
{
mode = miopenConvolution;
}
else if((inflags.GetValueStr("mode")) == "trans")
{
mode = miopenTranspose;
}
else
{
printf("Incorrect Convolution Mode\n");
exit(0); // NOLINT (concurrency-mt-unsafe)
}

// adjust padding based on user-defined padding mode
if(mode == miopenConvolution &&
(miopen::all_of(conv_dilations, [](auto v) { return v == 1; }) ||
Expand Down Expand Up @@ -1252,6 +1236,8 @@ int ConvDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
size_t wei_sz = GetTensorSize(weightTensor);
size_t out_sz = GetTensorSize(outputTensor);
auto subnorm_percentage = miopen::Value(MIOPEN_DRIVER_SUBNORM_PERCENTAGE{});
if(subnorm_percentage != 0)
std::cout << "MIOPEN_DRIVER_SUBNORM_PERCENTAGE = " << subnorm_percentage << std::endl;

// Workaround: Pad buffers allocations to be a multiple of 2M
if(miopen::IsEnabled(MIOPEN_DRIVER_PAD_BUFFERS_2M{}))
Expand Down Expand Up @@ -2252,7 +2238,7 @@ int ConvDriver<Tgpu, Tref>::RunForwardGpuImmed(const bool is_transform)
template <typename Tgpu, typename Tref>
int ConvDriver<Tgpu, Tref>::RunForwardCPU()
{
if(miopen::deref(convDesc).mode == miopenTranspose)
if(mode == miopenTranspose)
{
cpu_convolution_backward_data(miopen::deref(convDesc).GetSpatialDimension(),
outhost,
Expand Down Expand Up @@ -2308,7 +2294,7 @@ int ConvDriver<Tgpu, Tref>::RunForwardGPUReference()
std::cout << "gpu reference convolution does not support bias yet" << std::endl;
return -1;
}
auto ref_solution_id = miopen::deref(convDesc).mode == miopenTranspose
auto ref_solution_id = mode == miopenTranspose //
? miopen::solver::Id("ConvDirectNaiveConvBwd").Value()
: miopen::solver::Id("ConvDirectNaiveConvFwd").Value();
auto rc = miopenConvolutionForwardImmediate(handle,
Expand Down Expand Up @@ -3142,7 +3128,7 @@ int ConvDriver<Tgpu, Tref>::RunBackwardWrwGpuImmed()
template <typename Tgpu, typename Tref>
int ConvDriver<Tgpu, Tref>::RunBackwardWeightsCPU()
{
if(miopen::deref(convDesc).mode == miopenTranspose)
if(mode == miopenTranspose)
{
cpu_convolution_backward_weight(miopen::deref(convDesc).GetSpatialDimension(),
dout,
Expand Down Expand Up @@ -3178,7 +3164,7 @@ int ConvDriver<Tgpu, Tref>::RunBackwardWeightsCPU()
template <typename Tgpu, typename Tref>
int ConvDriver<Tgpu, Tref>::RunBackwardDataCPU()
{
if(miopen::deref(convDesc).mode == miopenTranspose)
if(mode == miopenTranspose)
{
cpu_convolution_forward(miopen::deref(convDesc).GetSpatialDimension(),
dout,
Expand Down Expand Up @@ -3275,7 +3261,7 @@ int ConvDriver<Tgpu, Tref>::RunBackwardDataGPUReference()
{
AutoPrepareForGpuReference naive_conv_enable;

auto ref_solution_id = miopen::deref(convDesc).mode == miopenTranspose
auto ref_solution_id = mode == miopenTranspose //
? miopen::solver::Id("ConvDirectNaiveConvFwd").Value()
: miopen::solver::Id("ConvDirectNaiveConvBwd").Value();
auto rc = miopenConvolutionBackwardDataImmediate(handle,
Expand Down Expand Up @@ -3324,7 +3310,7 @@ std::string ConvDriver<Tgpu, Tref>::GetVerificationCacheFileName(
{
std::ostringstream ss;

miopenConvolutionMode_t mode;
miopenConvolutionMode_t unused;

int spatial_dim = inflags.GetValueInt("spatial_dim");

Expand All @@ -3339,7 +3325,7 @@ std::string ConvDriver<Tgpu, Tref>::GetVerificationCacheFileName(
pads.data(),
conv_strides.data(),
conv_dilations.data(),
&mode);
&unused);

auto get_basename_string = [&]() {
switch(direction)
Expand Down Expand Up @@ -3471,8 +3457,7 @@ int ConvDriver<Tgpu, Tref>::VerifyForward()
}

std::cout << "Forward Convolution Verifies OK on " << (UseGPUReference() ? "GPU" : "CPU")
<< " reference (" << miopen::Value(MIOPEN_DRIVER_SUBNORM_PERCENTAGE{}) << ", "
<< " reference (" << error << ')' << std::endl;
<< " reference (" << error << " < " << tolerance << ')' << std::endl;

return 0;
}
Expand Down Expand Up @@ -3538,6 +3523,11 @@ int ConvDriver<Tgpu, Tref>::VerifyBackward()
// WrW deviation is ~twice worse than Bwd due to more FP computations involved,
// which means more roundings, so GPU amd CPU computations diverge more.
auto tolerance = 2 * GetDefaultTolerance();

// fp32 transposed convolutions show worse precision.
if(mode == miopenTranspose && std::is_same<Tgpu, float>::value)
tolerance *= 2;

// Winograd and iGemm WrW algorithms reveal bigger deviation than other algos.
if(is_wrw_winograd && std::is_same<Tgpu, float>::value)
{
Expand Down
60 changes: 22 additions & 38 deletions src/convolution_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,13 @@ static inline auto MakeFwdCtxAndProblem(miopenHandle_t handle,
const auto direction =
(conv.mode != miopenTranspose) ? Direction::Forward : Direction::BackwardData;

auto problem = (conv.mode != miopenTranspose) ? ProblemDescription{miopen::deref(xDesc),
miopen::deref(wDesc),
miopen::deref(yDesc),
conv,
direction}
: ProblemDescription{miopen::deref(yDesc),
miopen::deref(wDesc),
miopen::deref(xDesc),
conv,
direction};
/// \anchor transpose_convolutions_x_y_swapping
/// In transpose mode we exchange x with y. From the other hand, when Backward*
/// ProblemDescription instances are constructed, x and y shall be swapped as well.
/// As transpose mode swaps Forward with Backward AND x with y, the order of
/// ctor arguments remains the same.
auto problem = ProblemDescription{
miopen::deref(xDesc), miopen::deref(wDesc), miopen::deref(yDesc), conv, direction};

auto ctx = ExecutionContext{&miopen::deref(handle)};
problem.SetupFloats(ctx);
Expand All @@ -82,16 +79,9 @@ static inline auto MakeBwdCtxAndProblem(miopenHandle_t handle,
const auto direction =
(conv.mode != miopenTranspose) ? Direction::BackwardData : Direction::Forward;

auto problem = (conv.mode != miopenTranspose) ? ProblemDescription{miopen::deref(dyDesc),
miopen::deref(wDesc),
miopen::deref(dxDesc),
conv,
direction}
: ProblemDescription{miopen::deref(dxDesc),
miopen::deref(wDesc),
miopen::deref(dyDesc),
conv,
direction};
/// \ref transpose_convolutions_x_y_swapping
auto problem = ProblemDescription{
miopen::deref(dyDesc), miopen::deref(wDesc), miopen::deref(dxDesc), conv, direction};

auto ctx = ExecutionContext{&miopen::deref(handle)};
problem.SetupFloats(ctx);
Expand Down Expand Up @@ -490,7 +480,7 @@ miopenFindConvolutionForwardAlgorithm(miopenHandle_t handle,

miopen::debug::LogCmdFindConvolution(
xDesc, wDesc, convDesc, yDesc, miopen::debug::ConvDirection::Fwd, false);
/// workaround for previous trans conv logic

if(miopen::deref(convDesc).mode == miopenTranspose)
return miopen::try_([&] {
miopen::deref(convDesc).FindConvBwdDataAlgorithm(miopen::deref(handle),
Expand Down Expand Up @@ -563,7 +553,6 @@ extern "C" miopenStatus_t miopenConvolutionForward(miopenHandle_t handle,
miopen::debug::LogCmdConvolution(
xDesc, wDesc, convDesc, yDesc, miopen::debug::ConvDirection::Fwd, false);

/// workaround for previous trans conv logic
if(miopen::deref(convDesc).mode == miopenTranspose)
return miopen::try_([&] {
// It is guaranteed that enum values are equal, see conv_algo_name.cpp
Expand Down Expand Up @@ -1067,7 +1056,7 @@ miopenFindConvolutionBackwardDataAlgorithm(miopenHandle_t handle,

miopen::debug::LogCmdFindConvolution(
dxDesc, wDesc, convDesc, dyDesc, miopen::debug::ConvDirection::Bwd, false);
/// workaround for previous trans conv logic

if(miopen::deref(convDesc).mode == miopenTranspose)
return miopen::try_([&] {
miopen::deref(convDesc).FindConvFwdAlgorithm(miopen::deref(handle),
Expand Down Expand Up @@ -1141,7 +1130,6 @@ miopenConvolutionBackwardData(miopenHandle_t handle,
miopen::debug::LogCmdConvolution(
dxDesc, wDesc, convDesc, dyDesc, miopen::debug::ConvDirection::Bwd, false);

/// workaround for previous trans conv logic
if(miopen::deref(convDesc).mode == miopenTranspose)
return miopen::try_([&] {
// It is guaranteed that enum values are equal, see conv_algo_name.cpp
Expand Down Expand Up @@ -1247,15 +1235,13 @@ miopenFindConvolutionBackwardWeightsAlgorithm(miopenHandle_t handle,
xDesc, dwDesc, convDesc, dyDesc, miopen::debug::ConvDirection::WrW, false);

return miopen::try_([&] {
const auto trans = (miopen::deref(convDesc).mode == miopenTranspose);
miopen::deref(convDesc).FindConvBwdWeightsAlgorithm(
miopen::deref(handle),
/// workaround for previous trans conv logic
miopen::deref(convDesc).mode == miopenTranspose ? miopen::deref(xDesc)
: miopen::deref(dyDesc),
miopen::deref(convDesc).mode == miopenTranspose ? DataCast(x) : DataCast(dy),
miopen::deref(convDesc).mode == miopenTranspose ? miopen::deref(dyDesc)
: miopen::deref(xDesc),
miopen::deref(convDesc).mode == miopenTranspose ? DataCast(dy) : DataCast(x),
trans ? miopen::deref(xDesc) : miopen::deref(dyDesc),
trans ? DataCast(x) : DataCast(dy),
trans ? miopen::deref(dyDesc) : miopen::deref(xDesc),
trans ? DataCast(dy) : DataCast(x),
miopen::deref(dwDesc),
DataCast(dw),
requestAlgoCount,
Expand Down Expand Up @@ -1300,16 +1286,14 @@ miopenConvolutionBackwardWeights(miopenHandle_t handle,
xDesc, dwDesc, convDesc, dyDesc, miopen::debug::ConvDirection::WrW, false);

return miopen::try_([&] {
const auto trans = (miopen::deref(convDesc).mode == miopenTranspose);
miopen::deref(convDesc).ConvolutionBackwardWeights(
miopen::deref(handle),
alpha,
/// workaround for previous trans conv logic
miopen::deref(convDesc).mode == miopenTranspose ? miopen::deref(xDesc)
: miopen::deref(dyDesc),
miopen::deref(convDesc).mode == miopenTranspose ? DataCast(x) : DataCast(dy),
miopen::deref(convDesc).mode == miopenTranspose ? miopen::deref(dyDesc)
: miopen::deref(xDesc),
miopen::deref(convDesc).mode == miopenTranspose ? DataCast(dy) : DataCast(x),
trans ? miopen::deref(xDesc) : miopen::deref(dyDesc),
trans ? DataCast(x) : DataCast(dy),
trans ? miopen::deref(dyDesc) : miopen::deref(xDesc),
trans ? DataCast(dy) : DataCast(x),
algo,
beta,
miopen::deref(dwDesc),
Expand Down
74 changes: 42 additions & 32 deletions src/driver_arguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,21 +117,28 @@ std::string ConvArgsForMIOpenDriver(const miopen::TensorDescriptor& xDesc,
std::stringstream ss;
if(print_for_conv_driver)
ConvDataType(ss, xDesc);

/// \todo Dimensions (N, C, H, W, K..) are always parsed as if layout is NC(D)HW.
/// For other layouts, invalid values are printed.

if(convDesc.GetSpatialDimension() == 2)
{
ss << " -n " << xDesc.GetLengths()[0] // clang-format off
<< " -c " << xDesc.GetLengths()[1]
<< " -H " << xDesc.GetLengths()[2]
<< " -W " << xDesc.GetLengths()[3]
<< " -k " << wDesc.GetLengths()[0]
<< " -y " << wDesc.GetLengths()[2]
<< " -x " << wDesc.GetLengths()[3]
<< " -p " << convDesc.GetConvPads()[0]
<< " -q " << convDesc.GetConvPads()[1]
<< " -u " << convDesc.GetConvStrides()[0]
<< " -v " << convDesc.GetConvStrides()[1]
<< " -l " << convDesc.GetConvDilations()[0]
<< " -j " << convDesc.GetConvDilations()[1]; // clang-format on
ss << " -n " << xDesc.GetLengths()[0] //
<< " -c " << xDesc.GetLengths()[1] //
<< " -H " << xDesc.GetLengths()[2] //
<< " -W " << xDesc.GetLengths()[3] //
<< " -k "
<< (convDesc.mode == miopenTranspose //
? wDesc.GetLengths()[1] //
: wDesc.GetLengths()[0]) //
<< " -y " << wDesc.GetLengths()[2] //
<< " -x " << wDesc.GetLengths()[3] //
<< " -p " << convDesc.GetConvPads()[0] //
<< " -q " << convDesc.GetConvPads()[1] //
<< " -u " << convDesc.GetConvStrides()[0] //
<< " -v " << convDesc.GetConvStrides()[1] //
<< " -l " << convDesc.GetConvDilations()[0] //
<< " -j " << convDesc.GetConvDilations()[1];
std::string x_layout = xDesc.GetLayout("NCHW");
std::string w_layout = wDesc.GetLayout("NCHW");
std::string y_layout = yDesc.GetLayout("NCHW");
Expand All @@ -150,25 +157,28 @@ std::string ConvArgsForMIOpenDriver(const miopen::TensorDescriptor& xDesc,
}
else if(convDesc.GetSpatialDimension() == 3)
{
ss << " -n " << xDesc.GetLengths()[0] // clang-format off
<< " -c " << xDesc.GetLengths()[1]
<< " --in_d " << xDesc.GetLengths()[2]
<< " -H " << xDesc.GetLengths()[3]
<< " -W " << xDesc.GetLengths()[4]
<< " -k " << wDesc.GetLengths()[0]
<< " --fil_d " << wDesc.GetLengths()[2]
<< " -y " << wDesc.GetLengths()[3]
<< " -x " << wDesc.GetLengths()[4]
<< " --pad_d " << convDesc.GetConvPads()[0]
<< " -p " << convDesc.GetConvPads()[1]
<< " -q " << convDesc.GetConvPads()[2]
<< " --conv_stride_d " << convDesc.GetConvStrides()[0]
<< " -u " << convDesc.GetConvStrides()[1]
<< " -v " << convDesc.GetConvStrides()[2]
<< " --dilation_d " << convDesc.GetConvDilations()[0]
<< " -l " << convDesc.GetConvDilations()[1]
<< " -j " << convDesc.GetConvDilations()[2]
<< " --spatial_dim 3"; // clang-format on
ss << " -n " << xDesc.GetLengths()[0] //
<< " -c " << xDesc.GetLengths()[1] //
<< " --in_d " << xDesc.GetLengths()[2] //
<< " -H " << xDesc.GetLengths()[3] //
<< " -W " << xDesc.GetLengths()[4] //
<< " -k "
<< (convDesc.mode == miopenTranspose //
? wDesc.GetLengths()[1] //
: wDesc.GetLengths()[0]) //
<< " --fil_d " << wDesc.GetLengths()[2] //
<< " -y " << wDesc.GetLengths()[3] //
<< " -x " << wDesc.GetLengths()[4] //
<< " --pad_d " << convDesc.GetConvPads()[0] //
<< " -p " << convDesc.GetConvPads()[1] //
<< " -q " << convDesc.GetConvPads()[2] //
<< " --conv_stride_d " << convDesc.GetConvStrides()[0] //
<< " -u " << convDesc.GetConvStrides()[1] //
<< " -v " << convDesc.GetConvStrides()[2] //
<< " --dilation_d " << convDesc.GetConvDilations()[0] //
<< " -l " << convDesc.GetConvDilations()[1] //
<< " -j " << convDesc.GetConvDilations()[2] //
<< " --spatial_dim 3";
std::string x_layout = xDesc.GetLayout("NCDHW");
std::string w_layout = wDesc.GetLayout("NCDHW");
std::string y_layout = yDesc.GetLayout("NCDHW");
Expand Down
Loading

0 comments on commit 82f685c

Please sign in to comment.