Skip to content

Commit

Permalink
fix: 5dpos should be y_dim. oops
Browse files Browse the repository at this point in the history
  • Loading branch information
o2buzzle committed Apr 15, 2024
1 parent 7d413c4 commit 71b568b
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 21 deletions.
44 changes: 26 additions & 18 deletions driver/constant_pad_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,21 @@ T get5DValueAt(const T* x, const size_t* x_dims, size_t n, size_t c, size_t d, s
}

template <typename Tgpu, typename Tcheck>
int32_t mloConstantPadForwardRunHost(miopenTensorDescriptor_t inputDesc,
miopenTensorDescriptor_t outputDesc,
Tgpu* input,
Tcheck* output_host,
const size_t* padding,
float value)
void mloConstantPadForwardRunHost(miopenTensorDescriptor_t inputDesc,
miopenTensorDescriptor_t outputDesc,
Tgpu* input,
Tcheck* output_host,
const size_t* padding,
float value)
{
size_t o[5];
auto input_dims = miopen::deref(inputDesc).GetLengths();
auto output_dims = miopen::deref(outputDesc).GetLengths();

size_t output_size = output_dims[0] * output_dims[1] * output_dims[2] * output_dims[3] * output_dims[4];
auto input_strides = miopen::deref(inputDesc).GetStrides();

size_t output_size =
output_dims[0] * output_dims[1] * output_dims[2] * output_dims[3] * output_dims[4];

for(size_t gid = 0; gid < output_size; ++gid)
{
Expand All @@ -87,7 +89,7 @@ int32_t mloConstantPadForwardRunHost(miopenTensorDescriptor_t inputDesc,
for(int i = 0; i < 5; i++)
{
o[i] = o[i] - padding[2 * i];
flag *= (o[i] >= 0 && o[i] < input_dims[i]);
flag *= (o[i] < input_dims[i]);
}

if(flag)
Expand All @@ -99,7 +101,6 @@ int32_t mloConstantPadForwardRunHost(miopenTensorDescriptor_t inputDesc,
output_host[gid] = value;
}
}
return 0;
}

template <typename Tgpu, typename Tref>
Expand Down Expand Up @@ -260,11 +261,11 @@ template <typename Tgpu, typename Tref>
std::vector<size_t> ConstantPadDriver<Tgpu, Tref>::GetPaddingsFromCmdLine()
{
std::vector<size_t> paddings = std::vector<size_t>(10);
paddings[0] = inflags.GetValueInt("pad_n");
paddings[2] = inflags.GetValueInt("pad_c");
paddings[4] = inflags.GetValueInt("pad_d");
paddings[6] = inflags.GetValueInt("pad_h");
paddings[8] = inflags.GetValueInt("pad_w");
paddings[0] = inflags.GetValueInt("pad_n");
paddings[2] = inflags.GetValueInt("pad_c");
paddings[4] = inflags.GetValueInt("pad_d");
paddings[6] = inflags.GetValueInt("pad_h");
paddings[8] = inflags.GetValueInt("pad_w");
return paddings;
}

Expand All @@ -276,7 +277,7 @@ int ConstantPadDriver<Tgpu, Tref>::AllocateBuffersAndCopy()

in_dev = std::unique_ptr<GPUMem>(new GPUMem(0, input_size, sizeof(Tgpu)));
out_dev = std::unique_ptr<GPUMem>(new GPUMem(0, output_size, sizeof(Tgpu)));
padding_dev = std::make_unique<GPUMem>(0, 10, sizeof(size_t));
padding_dev = std::unique_ptr<GPUMem>(new GPUMem(0, 10, sizeof(size_t)));

input = std::vector<Tgpu>(input_size, static_cast<Tgpu>(0));
output = std::vector<Tgpu>(output_size, static_cast<Tgpu>(0));
Expand Down Expand Up @@ -349,7 +350,7 @@ int ConstantPadDriver<Tgpu, Tref>::RunForwardGPU()
template <typename Tgpu, typename Tref>
int ConstantPadDriver<Tgpu, Tref>::RunForwardCPU()
{
mloConstantPadForwardRunHost<Tgpu, Tref>(
mloConstantPadForwardRunHost(
inputDesc, outputDesc, input.data(), output_host.data(), padding.data(), value);
return miopenStatusSuccess;
}
Expand All @@ -359,17 +360,24 @@ int ConstantPadDriver<Tgpu, Tref>::VerifyForward()
{
RunForwardCPU();

bool failed = false;

for(int i = 0; i < output.size(); i++)
{
if(output[i] != output_host[i])
{
std::cout << "ConstantPadDriver: Forward verification failed." << std::endl;
std::cout << "output[" << i << "] = " << output[i] << " != "
<< "output_host[" << i << "] = " << output_host[i] << std::endl;
return miopenStatusNotInitialized;
return -1;
}
}

if(failed)
{
std::cout << "ConstantPadDriver: Forward verification failed." << std::endl;
return -1;
}

std::cout << "ConstantPadDriver: Forward verification passed." << std::endl;
return miopenStatusSuccess;
}
Expand Down
3 changes: 2 additions & 1 deletion src/kernels/MIOpenPadConstantFwd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ extern "C" __global__ void PadConstantFwdContiguous(
const INPUT_TYPE* __restrict__ x,
OUTPUT_TYPE* __restrict__ y,
const size_t* __restrict__ x_dims,
const size_t* __restrict__ y_dims,
const size_t* __restrict__ padding,
const size_t output_size,
float value)
Expand All @@ -71,7 +72,7 @@ extern "C" __global__ void PadConstantFwdContiguous(
size_t o[5];

// GET_NCDHW(o[0], o[1], o[2], o[3], o[4], gid, output);
GET_NCDHW(o[0], o[1], o[2], o[3], o[4], gid, x_dims);
GET_NCDHW(o[0], o[1], o[2], o[3], o[4], gid, y_dims);

// bool flag = true;
bool flag = true;
Expand Down
7 changes: 6 additions & 1 deletion src/solver/pad_constant_fwd/pad_constant_fwd_contiguous.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,19 @@ ConvSolution PadConstantFwdContiguous::GetSolution(
const size_t* d_xdims;
hipMalloc(&d_xdims, xdims.size() * sizeof(size_t));
hipMemcpy((void*)d_xdims, xdims.data(), xdims.size() * sizeof(size_t), hipMemcpyHostToDevice);

const size_t* d_ydims;
hipMalloc(&d_ydims, ydims.size() * sizeof(size_t));
hipMemcpy((void*)d_ydims, ydims.data(), ydims.size() * sizeof(size_t), hipMemcpyHostToDevice);

// Calculate output size (again)
size_t output_size = 1;
for(unsigned long ydim : ydims)
output_size *= ydim;

kernel(params.x, params.y, d_xdims, params.padding, output_size, params.padding_value);
kernel(params.x, params.y, d_xdims, d_ydims, params.padding, output_size, params.padding_value);
hipFree((void*)d_xdims);
hipFree((void*)d_ydims);
};
};

Expand Down
3 changes: 2 additions & 1 deletion test/gtest/pad_constant.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "verify.hpp"
#include "cpu_pad_constant.hpp"
#include <cstdio>
#include <cstdlib>
#include <gtest/gtest.h>
#include <miopen/miopen.h>
#include <miopen/constant_pad.hpp>
Expand Down Expand Up @@ -130,7 +131,7 @@ struct PadConstantTest : public ::testing::TestWithParam<PadConstantTestCase>
// Generate random padding
for(unsigned long & i : padding)
{
i = prng::gen_descreet_uniform_sign<size_t>(0, 10);
i = rand() % 10;
}

std::vector<size_t> out_dims;
Expand Down

0 comments on commit 71b568b

Please sign in to comment.