diff --git a/src/kernels/MIOpenPadConstantFwd.cpp b/src/kernels/MIOpenPadConstantFwd.cpp index fb288fe9a5..f5be34efe4 100644 --- a/src/kernels/MIOpenPadConstantFwd.cpp +++ b/src/kernels/MIOpenPadConstantFwd.cpp @@ -51,14 +51,13 @@ get5DValueAt(const T* x, const size_t* x_dims, size_t n, size_t c, size_t d, siz c * x_dims[2] * x_dims[3] * x_dims[4] + d * x_dims[3] * x_dims[4] + h * x_dims[4] + w]; } -extern "C" __global__ void PadConstantFwdContiguous( - const INPUT_TYPE* __restrict__ x, - OUTPUT_TYPE* __restrict__ y, - const size_t* __restrict__ x_dims, - const size_t* __restrict__ y_dims, - const size_t* __restrict__ padding, - const size_t output_size, - float value) +extern "C" __global__ void PadConstantFwdContiguous(const INPUT_TYPE* __restrict__ x, + OUTPUT_TYPE* __restrict__ y, + const size_t* __restrict__ x_dims, + const size_t* __restrict__ y_dims, + const size_t* __restrict__ padding, + const size_t output_size, + float value) { // size_t gid = get_global_id(0); // if (gid >= output_size) return; @@ -83,7 +82,7 @@ extern "C" __global__ void PadConstantFwdContiguous( for(int i = 0; i < 5; i++) { o[i] = o[i] - padding[2 * i]; - flag *= (o[i] >= 0 && o[i] < x_dims[i]); + flag *= o[i] < x_dims[i]; } // DTYPE val = flag ? GET_5D_VAL_AT(input, o[0], o[1], o[2], o[3], o[4]) : value; diff --git a/src/solver/pad_constant/pad_constant_fwd_contiguous.cpp b/src/solver/pad_constant/pad_constant_fwd_contiguous.cpp index 49de026dbf..01416f23d5 100644 --- a/src/solver/pad_constant/pad_constant_fwd_contiguous.cpp +++ b/src/solver/pad_constant/pad_constant_fwd_contiguous.cpp @@ -37,10 +37,11 @@ namespace miopen { namespace solver { namespace pad_constant_fwd_contiguous { bool PadConstantFwdContiguous::IsApplicable( - const ExecutionContext& context, + const ExecutionContext& /*context*/, const miopen::pad_constant_fwd_contiguous::ProblemDescription& problem) const { - if (!problem.IsSameType()) { + if(!problem.IsSameType()) + { return false; } @@ -48,18 +49,18 @@ bool PadConstantFwdContiguous::IsApplicable( } ConvSolution PadConstantFwdContiguous::GetSolution( - const ExecutionContext& context, + const ExecutionContext& /*context*/, const miopen::pad_constant_fwd_contiguous::ProblemDescription& problem) const { auto result = ConvSolution{miopenStatusSuccess}; - auto ydims = problem.GetYDesc().GetLengths(); + auto ydims = problem.GetYDesc().GetLengths(); - auto input_dtype = miopen::GetDataType(problem.GetXDesc().GetType()); + auto input_dtype = miopen::GetDataType(problem.GetXDesc().GetType()); auto output_dtype = miopen::GetDataType(problem.GetYDesc().GetType()); // for xgridsize: 5d -> 1d size_t output_size = 1; - for (int i = 0; i < 5; i++) + for(int i = 0; i < 5; i++) { output_size *= ydims[i] == 0 ? 1 : ydims[i]; } @@ -67,7 +68,8 @@ ConvSolution PadConstantFwdContiguous::GetSolution( size_t xlocalsize = 1024; // AlignUp blows up, because output_size can be > int_max. Lovely. // size_t xgridsize = AlignUp(output_size, xlocalsize); - size_t xgridsize = (((static_cast(output_size) + xlocalsize - 1) / xlocalsize) * xlocalsize); + size_t xgridsize = + (((static_cast(output_size) + xlocalsize - 1) / xlocalsize) * xlocalsize); size_t ylocalsize = 1; size_t ygridsize = 1; @@ -75,10 +77,10 @@ ConvSolution PadConstantFwdContiguous::GetSolution( kernel.kernel_file = "MIOpenPadConstantFwd.cpp"; kernel.kernel_name = "PadConstantFwdContiguous"; - + // TODO: Actually understand how to use this properly const auto build_params = KernelBuildParameters{ - {"INPUT_TYPE", input_dtype == "half" ? "half" : "float"}, + {"INPUT_TYPE", input_dtype == "half" ? "half" : "float"}, {"OUTPUT_TYPE", output_dtype == "half" ? "half" : "float"}, }; @@ -105,7 +107,7 @@ ConvSolution PadConstantFwdContiguous::GetSolution( const size_t* d_xdims; hipMallocManaged(&d_xdims, xdims.size() * sizeof(size_t)); memcpy((void*)d_xdims, xdims.data(), xdims.size() * sizeof(size_t)); - + const size_t* d_ydims; hipMallocManaged(&d_ydims, ydims.size() * sizeof(size_t)); memcpy((void*)d_ydims, ydims.data(), ydims.size() * sizeof(size_t)); @@ -115,7 +117,13 @@ ConvSolution PadConstantFwdContiguous::GetSolution( for(unsigned long ydim : ydims) output_size *= ydim; - kernel(params.x, params.y, d_xdims, d_ydims, params.padding, output_size, params.padding_value); + kernel(params.x, + params.y, + d_xdims, + d_ydims, + params.padding, + output_size, + params.padding_value); hipFree((void*)d_xdims); hipFree((void*)d_ydims); }; diff --git a/test/gtest/pad_constant.hpp b/test/gtest/pad_constant.hpp index dc85299a55..31aafa9a7c 100644 --- a/test/gtest/pad_constant.hpp +++ b/test/gtest/pad_constant.hpp @@ -119,12 +119,12 @@ struct PadConstantTest : public ::testing::TestWithParam auto in_dims = pad_constant_config.GetInput(); input = tensor{in_dims}.generate(gen_value); input_dev = handle.Write(input.data); - printf("Input tensor size is reported to be n=%lu c=%lu d=%lu h=%lu w=%lu\n", - in_dims[0], - in_dims[1], - in_dims[2], - in_dims[3], - in_dims[4]); + // printf("Input tensor size is reported to be n=%zu c=%zu d=%zu h=%zu w=%zu\n", + // in_dims[0], + // in_dims[1], + // in_dims[2], + // in_dims[3], + // in_dims[4]); // Generate random padding for(size_t& i : padding) @@ -137,12 +137,12 @@ struct PadConstantTest : public ::testing::TestWithParam { out_dims.push_back(in_dims[i] + 2 * padding[2 * i]); } - printf("Output tensor size is reported to be n=%lu c=%lu d=%lu h=%lu w=%lu\n", - out_dims[0], - out_dims[1], - out_dims[2], - out_dims[3], - out_dims[4]); + // printf("Output tensor size is reported to be n=%zu c=%zu d=%zu h=%zu w=%zu\n", + // out_dims[0], + // out_dims[1], + // out_dims[2], + // out_dims[3], + // out_dims[4]); output = tensor{out_dims}; std::fill(output.begin(), output.end(), std::numeric_limits::quiet_NaN()); @@ -157,12 +157,12 @@ struct PadConstantTest : public ::testing::TestWithParam auto&& handle = get_handle(); auto out_dims = output.desc.GetLengths(); - printf("Output tensor size is reported to be n=%lu c=%lu d=%lu h=%lu w=%lu\n", - out_dims[0], - out_dims[1], - out_dims[2], - out_dims[3], - out_dims[4]); + // printf("Output tensor size is reported to be n=%lu c=%lu d=%lu h=%lu w=%lu\n", + // out_dims[0], + // out_dims[1], + // out_dims[2], + // out_dims[3], + // out_dims[4]); cpu_pad_constant_fwd(input.data.data(), ref_output.data.data(),