Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test non-packed inputs with naive reference convolution kernels #2394

Merged
merged 38 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
94f5fd3
Squash commits together
amberhassaan Aug 20, 2023
5ab82f3
fix formatting. disable strides for fp8 kernel for now
amberhassaan Sep 23, 2023
be93522
fix the lengths of weight tensor
amberhassaan Sep 23, 2023
d85785b
use 64-bit integers for stride value
amberhassaan Sep 25, 2023
ee6abb3
Squash commits together
amberhassaan Aug 20, 2023
4fbcd77
fix test for non-packed strides
amberhassaan Sep 18, 2023
82e0ccf
fix format
amberhassaan Sep 18, 2023
7e8a258
Fix assertion check.
amberhassaan Sep 18, 2023
cadfb95
suppress cppcheck warning to test CI
junliume Sep 20, 2023
cde6e22
fix build and remove a check that prevents non-strided inputs
amberhassaan Sep 25, 2023
f7b606b
Merge remote-tracking branch 'origin/develop' into amber/non-packed-c…
amberhassaan Sep 25, 2023
0ad674b
Merge branch 'amber/non-packed-conv-ref-kern' into amber/tests-non-pa…
amberhassaan Sep 25, 2023
4db6cf8
Merge remote-tracking branch 'origin/develop' into amber/non-packed-c…
amberhassaan Sep 25, 2023
8af6d47
Merge branch 'amber/non-packed-conv-ref-kern' into amber/tests-non-pa…
amberhassaan Sep 25, 2023
e06c523
addressed comments. Moved common code into an include file
amberhassaan Sep 26, 2023
23d0066
Merge remote-tracking branch 'origin/develop' into amber/non-packed-c…
amberhassaan Sep 26, 2023
35c9072
Merge branch 'amber/non-packed-conv-ref-kern' into amber/tests-non-pa…
amberhassaan Sep 26, 2023
67d9a77
address comments
amberhassaan Sep 26, 2023
0f16c62
address review comments
amberhassaan Sep 28, 2023
c66da71
Merge remote-tracking branch 'origin/develop' into amber/non-packed-c…
amberhassaan Sep 28, 2023
8bb3a7f
Merge branch 'amber/non-packed-conv-ref-kern' into amber/tests-non-pa…
amberhassaan Sep 28, 2023
dec88a7
add more checks for strides
amberhassaan Sep 29, 2023
700b623
Merge remote-tracking branch 'origin/develop' into amber/non-packed-c…
amberhassaan Oct 2, 2023
1253aed
Merge branch 'amber/non-packed-conv-ref-kern' into amber/tests-non-pa…
amberhassaan Oct 2, 2023
85e8a62
fix test now that strides are supported
amberhassaan Oct 3, 2023
39eee97
use C++17 to compile HIP Kernels
amberhassaan Oct 4, 2023
f60f182
Merge remote-tracking branch 'origin/develop' into amber/non-packed-c…
amberhassaan Oct 4, 2023
e7c9d03
Merge branch 'amber/non-packed-conv-ref-kern' into amber/tests-non-pa…
amberhassaan Oct 4, 2023
57fdf6e
address comments
amberhassaan Oct 4, 2023
e8dbc49
Merge branch 'develop' into amber/tests-non-packed-conv
junliume Oct 8, 2023
b5d8b0f
address comments
amberhassaan Oct 4, 2023
0fe1a55
remove OpenCL code due to deprecation
amberhassaan Oct 4, 2023
1cab2c3
Merge branch 'amber/non-packed-conv-ref-kern' into amber/tests-non-pa…
amberhassaan Oct 9, 2023
505f6ab
fix build
amberhassaan Oct 9, 2023
c34c72a
fix Clang Format issue
junliume Oct 9, 2023
e146046
Merge remote-tracking branch 'origin/develop' into amber/tests-non-pa…
amberhassaan Oct 11, 2023
220e423
resolve conflicts and handle feedback
amberhassaan Oct 11, 2023
baa2da4
advance fin
amberhassaan Oct 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions driver/random.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ inline T gen_0_to_B(T B)
template <typename T>
inline T gen_A_to_B(T A, T B)
{
assert(B > A);
return gen_0_to_B(B - A) + A;
}

Expand Down
2 changes: 1 addition & 1 deletion fin
Submodule fin updated from 26b5c3 to afc1a8
4 changes: 4 additions & 0 deletions src/include/miopen/convolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <miopen/names.hpp>
#include <miopen/invoke_params.hpp>
#include <miopen/invoker.hpp>
#include <miopen/conv/tensors.hpp>
amberhassaan marked this conversation as resolved.
Show resolved Hide resolved

#include <nlohmann/json_fwd.hpp>

Expand Down Expand Up @@ -404,6 +405,9 @@ struct ConvolutionDescriptor : miopenConvolutionDescriptor

friend void to_json(nlohmann::json& json, const ConvolutionDescriptor& conv);
friend void from_json(const nlohmann::json& json, ConvolutionDescriptor& conv);

private:
void ValidateTensors(const ConvTensors& conv_tensors) const;
};

void ConvolutionBackwardBias(const Handle& handle,
Expand Down
6 changes: 3 additions & 3 deletions src/kernels/gpu_reference_kernel/naive_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ inline __device__ __host__ int8_t cast_to(const int32_t& val)
/// composable_kernel (CK) treats G dimension. Which is why nchw should be ngchw,
/// and nhwc should be nhwgc. Same follows for the 3D case.
///
/// - strides here are in the little-endian order, i.e., for NHWC, stride for N is
/// at index 3 while stride for C is at index 0. This is reverse of how strides are
/// stored in tensor descriptors, which are big-endian.
/// - strides here are stored right to left, i.e., for NHWC, stride for N is
/// at index 3 while stride for C is at index 0. This is different from how the
/// tensor descriptors store strides, which is always NCHW order, left-to-right.

template <bool ASSUME_PACKED, typename src_data_t, typename acc_data_t, typename dst_data_t>
inline __device__ void naive_conv_fwd_nchw(const src_data_t* __restrict__ p_in,
Expand Down
123 changes: 88 additions & 35 deletions src/ocl/convolutionocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,30 +287,6 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle,

namespace {

void ValidateConvTensors(const ConvTensors& tensors)
{
const auto invalid_buffers =
tensors.x == nullptr || tensors.w == nullptr || tensors.y == nullptr;

const auto tensor_sizes_not_matched = tensors.xDesc.GetSize() != tensors.yDesc.GetSize() ||
tensors.xDesc.GetSize() != tensors.wDesc.GetSize();

const auto trivial_tensor_types_not_matched =
tensors.xDesc.GetType() != tensors.yDesc.GetType() && tensors.xDesc.GetType() != miopenInt8;

// if(xDesc.GetLengths()[1] != wDesc.GetLengths()[1]) {
// MIOPEN_THROW(miopenStatusBadParm);
//}

const auto x_tensor_invalid = tensors.xDesc.GetSize() < 3;

const auto bad_parameters = invalid_buffers || tensor_sizes_not_matched ||
trivial_tensor_types_not_matched || x_tensor_invalid;

if(bad_parameters)
MIOPEN_THROW(miopenStatusBadParm);
}

void ValidateAlphaBeta(const void* alpha, const void* beta)
{
if(!float_equal(*(static_cast<const float*>(alpha)), 1.0) ||
Expand Down Expand Up @@ -401,6 +377,88 @@ static void ConvForwardCheckNumerics(const Handle& handle,
}
}

void ConvolutionDescriptor::ValidateTensors(const ConvTensors& tensors) const
{

// Group stride in current TensorDescriptor is implicit. When invoking kernels,
// we need to add the group dimension G and compute its stride. We want the stride
// left of C to be a multiple of group count G. e.g. for NCHW, the stride for N
// should be a multiple of G so that we can compute the strides for NGCHW
auto bad_group_stride = [this](const TensorDescriptor& td) {
auto l = td.GetLayout_t();
int g_stride_index = -1;
if(l == miopenTensorNCHW || l == miopenTensorNCDHW)
{
g_stride_index = 0; // stride index for N;
}
else if(l == miopenTensorNHWC || l == miopenTensorNDHWC)
{
// stride index for W. Normally this would be 2nd-last stride but we store
// strides in NCHW order for some weird reason.
g_stride_index = td.GetStrides().size() - 1;
}
else
{
MIOPEN_THROW(miopenStatusInternalError, "Layout not supported for grouped convolution");
}

amberhassaan marked this conversation as resolved.
Show resolved Hide resolved
if(g_stride_index != -1)
{
return (td.GetStrides()[g_stride_index] % this->group_count) != 0;
}

return false;
};

// invalid_buffers
if(tensors.x == nullptr || tensors.w == nullptr || tensors.y == nullptr)
{
MIOPEN_THROW(miopenStatusBadParm, "One of the convolution tensors is null");
}

// x_tensor_invalid =
if(tensors.xDesc.GetSize() < 3)
{
MIOPEN_THROW(miopenStatusBadParm, "input tensor's number of dimensions is wrong");
}

// tensor_sizes_not_matched =
if(tensors.xDesc.GetSize() != tensors.yDesc.GetSize() ||
tensors.xDesc.GetSize() != tensors.wDesc.GetSize())
{
MIOPEN_THROW(miopenStatusBadParm,
"number of dimensions mismatch between input, output and weights tensors");
}

// trivial_tensor_types_not_matched =
if(tensors.xDesc.GetType() != tensors.yDesc.GetType() &&
tensors.xDesc.GetType() != miopenInt8 && tensors.xDesc.GetType() != miopenInt8x4)
{
MIOPEN_THROW(miopenStatusBadParm, "input/output tensor data types do not match");
}

// check for bad_group_stride. This applies for input and output only. There
// is no check for weight tensor currently.
// no need to check for group_count == 1

if((this->group_count > 1) && bad_group_stride(tensors.xDesc))
{
MIOPEN_THROW(
miopenStatusBadParm,
"Invalid input tensor strides. Channel stride must be a multiple of group count");
}
if((this->group_count > 1) && bad_group_stride(tensors.yDesc))
{
MIOPEN_THROW(
miopenStatusBadParm,
"Invalid output tensor strides. Channel stride must be a multiple of group count");
}

// if(xDesc.GetLengths()[1] != wDesc.GetLengths()[1]) {
// MIOPEN_THROW(miopenStatusBadParm);
//}
}

void ConvolutionDescriptor::ConvolutionForward(Handle& handle,
const void* alpha,
const TensorDescriptor& xDesc,
Expand All @@ -416,13 +474,8 @@ void ConvolutionDescriptor::ConvolutionForward(Handle& handle,
{
MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);

if(!(xDesc.IsPacked() && wDesc.IsPacked() && yDesc.IsPacked()))
{
MIOPEN_THROW(miopenStatusNotImplemented, "Only fully packed tensors are supported");
}

averinevg marked this conversation as resolved.
Show resolved Hide resolved
const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};
ValidateConvTensors(tensors);
ValidateTensors(tensors);
ValidateAlphaBeta(alpha, beta);

ConvForwardCheckNumerics(handle, tensors, [&]() {
Expand Down Expand Up @@ -735,7 +788,7 @@ void ConvolutionDescriptor::ConvolutionForwardImmediate(Handle& handle,
MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
const auto tensors = ConvFwdTensors{xDesc, x, wDesc, w, yDesc, y};

ValidateConvTensors(tensors);
ValidateTensors(tensors);
if(!solver_id.IsValid())
MIOPEN_THROW(miopenStatusBadParm);

Expand Down Expand Up @@ -871,7 +924,7 @@ void ConvolutionDescriptor::ConvolutionBackwardData(Handle& handle,

auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx};

ValidateConvTensors(tensors);
ValidateTensors(tensors);
ValidateAlphaBeta(alpha, beta);

ConvBwdCheckNumerics(handle, tensors, beta, [&]() {
Expand Down Expand Up @@ -937,7 +990,7 @@ void ConvolutionDescriptor::ConvolutionBackwardImmediate(Handle& handle,
MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
auto tensors = ConvBwdTensors{dyDesc, dy, wDesc, w, dxDesc, dx};

ValidateConvTensors(tensors);
ValidateTensors(tensors);

static const float beta = 0.0f;
ConvBwdCheckNumerics(handle, tensors, &beta, [&]() {
Expand Down Expand Up @@ -1071,7 +1124,7 @@ void ConvolutionDescriptor::ConvolutionBackwardWeights(const Handle& handle,
{
MIOPEN_LOG_I("algo = " << algo << ", workspace = " << workSpaceSize);
decltype(auto) tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
ValidateConvTensors(tensors);
ValidateTensors(tensors);
ValidateAlphaBeta(alpha, beta);

if(xDesc.GetType() == miopenInt8)
Expand Down Expand Up @@ -1134,7 +1187,7 @@ void ConvolutionDescriptor::ConvolutionWrwImmediate(Handle& handle,
{
MIOPEN_LOG_I("solver_id = " << solver_id.ToString() << ", workspace = " << workSpaceSize);
auto tensors = ConvWrwTensors{dyDesc, dy, xDesc, x, dwDesc, dw};
ValidateConvTensors(tensors);
ValidateTensors(tensors);

if(xDesc.GetType() == miopenInt8)
MIOPEN_THROW(miopenStatusBadParm);
Expand Down
4 changes: 4 additions & 0 deletions src/solver/conv_direct_naive_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,15 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS);
std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem)
{
std::ostringstream kernel_name;

/// \todo remove packed reference convolution kernels --amberhassaan
#ifndef NDEBUG // enable in debug mode only
if(miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS()))
{
kernel_name << "naive_conv_packed_";
}
else
#endif
{
kernel_name << "naive_conv_nonpacked_";
}
Expand Down
8 changes: 2 additions & 6 deletions src/solver/conv_direct_naive_conv_bwd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,8 @@ ConvSolution ConvDirectNaiveConvBwd::GetSolution(const ExecutionContext& ctx,
kernel.l_wk.push_back(1);
kernel.l_wk.push_back(1);

const auto is_f8 = [&]() {
if(kernel.kernel_file == "fp8_naive_conv.cpp")
return true;
else
return false;
}();
const auto is_f8 = (kernel.kernel_file == "fp8_naive_conv.cpp");
CAHEK7 marked this conversation as resolved.
Show resolved Hide resolved

kernel.comp_options = ConvDirectNaiveConvCompileOption(ctx, problem);

int G_stride_idx = conv_internal::GetGroupStrideIndex(problem);
Expand Down
8 changes: 2 additions & 6 deletions src/solver/conv_direct_naive_conv_fwd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,6 @@ ConvSolution ConvDirectNaiveConvFwd::GetSolution(const ExecutionContext& ctx,
KernelInfo kernel;

kernel.kernel_file = ConvDirectNaiveConvKernelFile(ctx, problem);
const auto is_f8 = [&]() {
if(kernel.kernel_file == "fp8_naive_conv.cpp")
return true;
else
return false;
}();
kernel.kernel_name = ConvDirectNaiveConvKernelName(problem);
kernel.g_wk.clear();

Expand All @@ -139,6 +133,8 @@ ConvSolution ConvDirectNaiveConvFwd::GetSolution(const ExecutionContext& ctx,
kernel.l_wk.push_back(1);
kernel.l_wk.push_back(1);

const auto is_f8 = (kernel.kernel_file == "fp8_naive_conv.cpp");

kernel.comp_options = ConvDirectNaiveConvCompileOption(ctx, problem);

int G_stride_idx = conv_internal::GetGroupStrideIndex(problem);
Expand Down
8 changes: 2 additions & 6 deletions src/solver/conv_direct_naive_conv_wrw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,9 @@ ConvSolution ConvDirectNaiveConvWrw::GetSolution(const ExecutionContext& ctx,
kernel.l_wk.push_back(1);
kernel.l_wk.push_back(1);

const auto is_f8 = (kernel.kernel_file == "fp8_naive_conv.cpp");

kernel.comp_options = ConvDirectNaiveConvCompileOption(ctx, problem);
const auto is_f8 = [&]() {
if(kernel.kernel_file == "fp8_naive_conv.cpp")
return true;
else
return false;
}();

int G_stride_idx = conv_internal::GetGroupStrideIndex(problem);

Expand Down
Loading