Skip to content

Commit

Permalink
Test non-packed inputs with naive reference convolution kernels (#2394)
Browse files Browse the repository at this point in the history
  • Loading branch information
amberhassaan authored Oct 12, 2023
1 parent b45e54d commit defb1b0
Show file tree
Hide file tree
Showing 11 changed files with 221 additions and 332 deletions.
1 change: 1 addition & 0 deletions driver/random.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ inline T gen_0_to_B(T B)
template <typename T>
inline T gen_A_to_B(T A, T B)
{
assert(B > A);
return gen_0_to_B(B - A) + A;
}

Expand Down
2 changes: 1 addition & 1 deletion fin
Submodule fin updated from 26b5c3 to afc1a8
4 changes: 4 additions & 0 deletions src/include/miopen/convolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <miopen/names.hpp>
#include <miopen/invoke_params.hpp>
#include <miopen/invoker.hpp>
#include <miopen/conv/tensors.hpp>

#include <nlohmann/json_fwd.hpp>

Expand Down Expand Up @@ -404,6 +405,9 @@ struct ConvolutionDescriptor : miopenConvolutionDescriptor

friend void to_json(nlohmann::json& json, const ConvolutionDescriptor& conv);
friend void from_json(const nlohmann::json& json, ConvolutionDescriptor& conv);

private:
void ValidateTensors(const ConvTensors& conv_tensors) const;
};

void ConvolutionBackwardBias(const Handle& handle,
Expand Down
6 changes: 3 additions & 3 deletions src/kernels/gpu_reference_kernel/naive_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ inline __device__ __host__ int8_t cast_to(const int32_t& val)
/// composable_kernel (CK) treats G dimension. Which is why nchw should be ngchw,
/// and nhwc should be nhwgc. Same follows for the 3D case.
///
/// - strides here are in the little-endian order, i.e., for NHWC, stride for N is
/// at index 3 while stride for C is at index 0. This is reverse of how strides are
/// stored in tensor descriptors, which are big-endian.
/// - strides here are stored right to left, i.e., for NHWC, stride for N is
/// at index 3 while stride for C is at index 0. This is different from how the
/// tensor descriptors store strides, which is always NCHW order, left-to-right.

template <bool ASSUME_PACKED, typename src_data_t, typename acc_data_t, typename dst_data_t>
inline __device__ void naive_conv_fwd_nchw(const src_data_t* __restrict__ p_in,
Expand Down
123 changes: 88 additions & 35 deletions src/ocl/convolutionocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,30 +287,6 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle,

namespace {

void ValidateConvTensors(const ConvTensors& tensors)
{
const auto invalid_buffers =
tensors.x == nullptr || tensors.w == nullptr || tensors.y == nullptr;

const auto tensor_sizes_not_matched = tensors.xDesc.GetSize() != tensors.yDesc.GetSize() ||
tensors.xDesc.GetSize() != tensors.wDesc.GetSize();

const auto trivial_tensor_types_not_matched =
tensors.xDesc.GetType() != tensors.yDesc.GetType() && tensors.xDesc.GetType() != miopenInt8;

// if(xDesc.GetLengths()[1] != wDesc.GetLengths()[1]) {
// MIOPEN_THROW(miopenStatusBadParm);
//}

const auto x_tensor_invalid = tensors.xDesc.GetSize() < 3;

const auto bad_parameters = invalid_buffers || tensor_sizes_not_matched ||
trivial_tensor_types_not_matched || x_tensor_invalid;

if(bad_parameters)
MIOPEN_THROW(miopenStatusBadParm);
}

void ValidateAlphaBeta(const void* alpha, const void* beta)
{
if(!float_equal(*(static_cast<const float*>(alpha)), 1.0) ||
Expand Down Expand Up @@ -401,6 +377,88 @@ static void ConvForwardCheckNumerics(const Handle& handle,
}
}

void ConvolutionDescriptor::ValidateTensors(const ConvTensors& tensors) const
{

// Group stride in current TensorDescriptor is implicit. When invoking kernels,
// we need to add the group dimension G and compute its stride. We want the stride
// left of C to be a multiple of group count G. e.g. for NCHW, the stride for N
// should be a multiple of G so that we can compute the strides for NGCHW
auto bad_group_stride = [this](const TensorDescriptor& td) {
auto l = td.GetLayout_t();
int g_stride_index = -1;
if(l == miopenTensorNCHW || l == miopenTensorNCDHW)
{
g_stride_index = 0; // stride index for N;
}
else if(l == miopenTensorNHWC || l == miopenTensorNDHWC)
{
// stride index for W. Normally this would be 2nd-last stride but we store
// strides in NCHW order for some weird reason.
g_stride_index = td.GetStrides().size() - 1;
}
else
{
MIOPEN_THROW(miopenStatusInternalError, "Layout not supported for grouped convolution");
}

if(g_stride_index != -1)
{
return (td.GetStrides()[g_stride_index] % this->group_count) != 0;
}

return false;
};

// invalid_buffers
if(tensors.x == nullptr || tensors.w == nullptr || tensors.y == nullptr)
{
MIOPEN_THROW(miopenStatusBadParm, "One of the convolution tensors is null");
}

// x_tensor_invalid =
if(tensors.xDesc.GetSize() < 3)
{
MIOPEN_THROW(miopenStatusBadParm, "input tensor's number of dimensions is wrong");
}

// tensor_sizes_not_matched =
if(tensors.xDesc.GetSize() != tensors.yDesc.GetSize() ||
tensors.xDesc.GetSize() != tensors.wDesc.GetSize())
{
MIOPEN_THROW(miopenStatusBadParm,
"number of dimensions mismatch between input, output and weights tensors");
}

// trivial_tensor_types_not_matched =
if(tensors.xDesc.GetType() != tensors.yDesc.GetType() &&
tensors.xDesc.GetType() != miopenInt8 && tensors.xDesc.GetType() != miopenInt8x4)
{
MIOPEN_THROW(miopenStatusBadParm, "input/output tensor data types do not match");
}

// check for bad_group_stride. This applies for input and output only. There
// is no check for weight tensor currently.
// no need to check for group_count == 1

if((this->group_count > 1) && bad_group_stride(tensors.xDesc))
{
MIOPEN_THROW(
miopenStatusBadParm,
"Invalid input tensor strides. Channel stride must be a multiple of group count");
}
if((this->group_count > 1) && bad_group_stride(tensors.yDesc))
{
MIOPEN_THROW(
miopenStatusBadParm,
"Invalid output tensor strides. Channel stride must be a multiple of group count");
}

// if(xDesc.GetLengths()[1] != wDesc.GetLengths()[1]) {
// MIOPEN_THROW(miopenStatusBadParm);
//}
}

void ConvolutionDescriptor::ConvolutionForward(Handle& handle,
const void* alpha,
const TensorDescriptor& xDesc,
Expand All @@ -416,13 +474,8 @@ void ConvolutionDescriptor::ConvolutionForward(Handle& handle,
{
MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);

if(!(xDesc.IsPacked() && wDesc.IsPacked() && yDesc.IsPacked()))
{
MIOPEN_THROW(miopenStatusNotImplemented, "Only fully packed tensors are supported");
}

const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};
ValidateConvTensors(tensors);
ValidateTensors(tensors);
ValidateAlphaBeta(alpha, beta);

ConvForwardCheckNumerics(handle, tensors, [&]() {
Expand Down Expand Up @@ -735,7 +788,7 @@ void ConvolutionDescriptor::ConvolutionForwardImmediate(Handle& handle,
MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};

ValidateConvTensors(tensors);
ValidateTensors(tensors);
if(!solver_id.IsValid())
MIOPEN_THROW(miopenStatusBadParm);

Expand Down Expand Up @@ -871,7 +924,7 @@ void ConvolutionDescriptor::ConvolutionBackwardData(Handle& handle,

auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx};

ValidateConvTensors(tensors);
ValidateTensors(tensors);
ValidateAlphaBeta(alpha, beta);

ConvBwdCheckNumerics(handle, tensors, beta, [&]() {
Expand Down Expand Up @@ -937,7 +990,7 @@ void ConvolutionDescriptor::ConvolutionBackwardImmediate(Handle& handle,
MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx};

ValidateConvTensors(tensors);
ValidateTensors(tensors);

static const float beta = 0.0f;
ConvBwdCheckNumerics(handle, tensors, &beta, [&]() {
Expand Down Expand Up @@ -1071,7 +1124,7 @@ void ConvolutionDescriptor::ConvolutionBackwardWeights(const Handle& handle,
{
MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
decltype(auto) tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
ValidateConvTensors(tensors);
ValidateTensors(tensors);
ValidateAlphaBeta(alpha, beta);

if(xDesc.GetType() == miopenInt8)
Expand Down Expand Up @@ -1134,7 +1187,7 @@ void ConvolutionDescriptor::ConvolutionWrwImmediate(Handle& handle,
{
MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
auto tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
ValidateConvTensors(tensors);
ValidateTensors(tensors);

if(xDesc.GetType() == miopenInt8)
MIOPEN_THROW(miopenStatusBadParm);
Expand Down
4 changes: 4 additions & 0 deletions src/solver/conv_direct_naive_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,15 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS);
std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem)
{
std::ostringstream kernel_name;

/// \todo remove packed reference convolution kernels --amberhassaan
#ifndef NDEBUG // enable in debug mode only
if(miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS()))
{
kernel_name << "naive_conv_packed_";
}
else
#endif
{
kernel_name << "naive_conv_nonpacked_";
}
Expand Down
8 changes: 2 additions & 6 deletions src/solver/conv_direct_naive_conv_bwd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,8 @@ ConvSolution ConvDirectNaiveConvBwd::GetSolution(const ExecutionContext& ctx,
kernel.l_wk.push_back(1);
kernel.l_wk.push_back(1);

const auto is_f8 = [&]() {
if(kernel.kernel_file == "fp8_naive_conv.cpp")
return true;
else
return false;
}();
const auto is_f8 = (kernel.kernel_file == "fp8_naive_conv.cpp");

kernel.comp_options = ConvDirectNaiveConvCompileOption(ctx, problem);

int G_stride_idx = conv_internal::GetGroupStrideIndex(problem);
Expand Down
8 changes: 2 additions & 6 deletions src/solver/conv_direct_naive_conv_fwd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,6 @@ ConvSolution ConvDirectNaiveConvFwd::GetSolution(const ExecutionContext& ctx,
KernelInfo kernel;

kernel.kernel_file = ConvDirectNaiveConvKernelFile(ctx, problem);
const auto is_f8 = [&]() {
if(kernel.kernel_file == "fp8_naive_conv.cpp")
return true;
else
return false;
}();
kernel.kernel_name = ConvDirectNaiveConvKernelName(problem);
kernel.g_wk.clear();

Expand All @@ -139,6 +133,8 @@ ConvSolution ConvDirectNaiveConvFwd::GetSolution(const ExecutionContext& ctx,
kernel.l_wk.push_back(1);
kernel.l_wk.push_back(1);

const auto is_f8 = (kernel.kernel_file == "fp8_naive_conv.cpp");

kernel.comp_options = ConvDirectNaiveConvCompileOption(ctx, problem);

int G_stride_idx = conv_internal::GetGroupStrideIndex(problem);
Expand Down
8 changes: 2 additions & 6 deletions src/solver/conv_direct_naive_conv_wrw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,9 @@ ConvSolution ConvDirectNaiveConvWrw::GetSolution(const ExecutionContext& ctx,
kernel.l_wk.push_back(1);
kernel.l_wk.push_back(1);

const auto is_f8 = (kernel.kernel_file == "fp8_naive_conv.cpp");

kernel.comp_options = ConvDirectNaiveConvCompileOption(ctx, problem);
const auto is_f8 = [&]() {
if(kernel.kernel_file == "fp8_naive_conv.cpp")
return true;
else
return false;
}();

int G_stride_idx = conv_internal::GetGroupStrideIndex(problem);

Expand Down
Loading

0 comments on commit defb1b0

Please sign in to comment.