Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests for RNN seq API #2493

Merged
merged 34 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
aaa8152
test base
shurale-nkn Sep 4, 2023
63ec4f3
base test code and seq_tensor holder class
shurale-nkn Oct 11, 2023
d22b252
add ability to use empty container as valid result in tests
shurale-nkn Oct 11, 2023
8c9a1ab
universal_ref base
shurale-nkn Oct 16, 2023
68469a2
any layout RNN training
shurale-nkn Oct 22, 2023
7050f82
full lstm testing
shurale-nkn Oct 30, 2023
c075c0d
remove memory leaks from cpu verification related to dropout usage
shurale-nkn Oct 30, 2023
b1e9174
combining RNN verification into one file
shurale-nkn Oct 31, 2023
af730b7
cpu verification file rename
shurale-nkn Oct 31, 2023
233cebd
remove memory leaks from cpu verification related to dropout usage in…
shurale-nkn Oct 31, 2023
7ee564e
small changes
shurale-nkn Oct 31, 2023
a34361c
update for configuration checks
shurale-nkn Oct 31, 2023
7eaaf54
RNN kernel FIX: failed at creating a zero-size tensor
shurale-nkn Oct 31, 2023
3f921ca
RNN kernel FIX: failed if when the output buffer was dirty.
shurale-nkn Oct 31, 2023
842f81f
FIX RNNTensorBaseLayoutConverter: avoid zero size copy
shurale-nkn Oct 31, 2023
d5d6553
FIX SeqTensorDescriptor: a wider range of values
shurale-nkn Oct 31, 2023
b2aa06e
fix clang-format issues
junliume Oct 31, 2023
4a5b083
Merge remote-tracking branch 'origin/develop' into rnn_tests_2
shurale-nkn Oct 31, 2023
bda6229
change RAN_GEN to prng::gen_A_to_B
shurale-nkn Oct 31, 2023
5f83685
missing include "cpu_rnn.hpp"
shurale-nkn Nov 1, 2023
c0654c7
variable maybe_unused in release build
shurale-nkn Nov 1, 2023
3ff724f
tidy fix and remove of unused code
shurale-nkn Nov 1, 2023
34c1094
value range fix
shurale-nkn Nov 2, 2023
ef84a11
kernel fix: hidden_state batch bigger than input batch
shurale-nkn Nov 7, 2023
ec77201
tidy
shurale-nkn Nov 7, 2023
84da645
Revert "tidy"
shurale-nkn Nov 7, 2023
e7486d3
inc tolerance
shurale-nkn Nov 7, 2023
289fa4d
Merge remote-tracking branch 'origin/develop' into rnn_tests_2
shurale-nkn Nov 7, 2023
4429387
Merge remote-tracking branch 'origin/develop' into rnn_tests_2
shurale-nkn Nov 10, 2023
3782fb1
Merge branch 'develop' into rnn_tests
shurale-nkn Nov 12, 2023
f9896f3
Merge remote-tracking branch 'origin/develop' into rnn_tests_2
shurale-nkn Nov 29, 2023
a1c9650
addressed review comments
shurale-nkn Nov 29, 2023
3cd6227
merge remote-tracking branch 'origin/rnn_tests' into rnn_tests_2
shurale-nkn Nov 29, 2023
baacc0f
Merge branch 'develop' into rnn_tests
shurale-nkn Dec 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion driver/rnn_seq_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,7 @@ int RNNSeqDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
}

// Unless seed is persistent between runs validation using cache stored in file is impossible.
srand(0);
prng::reset_seed();

auto fill_array_via_gen = [](auto& dst, size_t dst_sz, double range_l, double range_r) {
for(size_t it = 0; it < dst_sz; it++)
Expand Down
180 changes: 101 additions & 79 deletions src/ocl/rnnocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,26 @@ void RNNDescriptor::RNNForwardTraining_MS(Handle& handle,
const std::vector<size_t> hcy_dst_stride{
static_cast<size_t>(hidden_size * max_batch), static_cast<size_t>(hidden_size), 1};

if(in_n.at(0) < max_batch)
{
float beta = 0.;
const std::vector<size_t> zero_set_size{1,
static_cast<size_t>(max_batch - in_n.at(0)),
static_cast<size_t>(hidden_size)};
auto set_batch_offset = in_n.at(0) * hidden_size;

auto set_desc =
miopen::TensorDescriptor(wDesc.GetType(), zero_set_size, hcy_dst_stride);
if(hy != nullptr)
{
SetTensor(handle, set_desc, hy, &beta, hcy_layer_offset + set_batch_offset);
}
if(cy != nullptr)
{
SetTensor(handle, set_desc, cy, &beta, hcy_layer_offset + set_batch_offset);
}
}

for(int time_i = seq_len - 1; time_i >= 0; time_i--)
{
auto copy_batch = (time_i == seq_len - 1) ? in_n.at(time_i)
Expand Down Expand Up @@ -2879,86 +2899,89 @@ void RNNDescriptor::RNNForwardTrainingPackedTensors(
}
else
{
sp_size[1] = batch_n - in_n.at(0);
sp_size[2] = wei_len;
sp_desc = miopen::TensorDescriptor(wDesc.GetType(), sp_size, sp_stride);
w_size[1] = 1;
w_size[2] = wei_len;
w_desc = miopen::TensorDescriptor(wDesc.GetType(), w_size, w_stride);
if(batch_n - in_n.at(0) > 0)
{
sp_size[1] = batch_n - in_n.at(0);
sp_size[2] = wei_len;
sp_desc = miopen::TensorDescriptor(wDesc.GetType(), sp_size, sp_stride);
w_size[1] = 1;
w_size[2] = wei_len;
w_desc = miopen::TensorDescriptor(wDesc.GetType(), w_size, w_stride);

OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
hid_shift + in_n.at(0) * hy_stride,
wei_shift_bias_temp,
hid_shift + in_n.at(0) * hy_stride,
true);
// Update time
profileRNNkernels(handle, 1, ctime);
OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
hid_shift + in_n.at(0) * hy_stride,
wei_shift_bias_temp,
hid_shift + in_n.at(0) * hy_stride,
true);
// Update time
profileRNNkernels(handle, 1, ctime);

if(dirMode != 0u)
{
if(in_n.at(0) == in_n.at(seqLen - 1))
{
OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
hid_shift + wei_len,
wei_shift_bias_temp + wei_len,
hid_shift + wei_len,
true);
// Update time
profileRNNkernels(handle, 1, ctime);
}
else
if(dirMode != 0u)
{
int cur_batch = 0;
for(int ti = 0; ti < seqLen; ti++)
if(in_n.at(0) == in_n.at(seqLen - 1))
{
if(ti != (seqLen - 1))
OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
hid_shift + wei_len,
wei_shift_bias_temp + wei_len,
hid_shift + wei_len,
true);
// Update time
profileRNNkernels(handle, 1, ctime);
}
else
{
int cur_batch = 0;
for(int ti = 0; ti < seqLen; ti++)
{
offset = hid_shift + cur_batch * hy_stride;
if(ti != (seqLen - 1))
{
offset = hid_shift + cur_batch * hy_stride;

sp_size[1] = in_n.at(ti + 1);
sp_size[2] = wei_len;
sp_desc =
miopen::TensorDescriptor(wDesc.GetType(), sp_size, sp_stride);
sp_size[1] = in_n.at(ti + 1);
sp_size[2] = wei_len;
sp_desc = miopen::TensorDescriptor(
wDesc.GetType(), sp_size, sp_stride);

OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
static_cast<int>(offset) + wei_len,
wei_shift_bias_temp + wei_len,
static_cast<int>(offset) + wei_len,
true);
// Update time
profileRNNkernels(handle, 1, ctime);
OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
static_cast<int>(offset) + wei_len,
wei_shift_bias_temp + wei_len,
static_cast<int>(offset) + wei_len,
true);
// Update time
profileRNNkernels(handle, 1, ctime);
}
cur_batch += in_n.at(ti);
}
cur_batch += in_n.at(ti);
}
}
}
Expand Down Expand Up @@ -5374,18 +5397,17 @@ void RNNDescriptor::RNNBackwardDataPackedTensors(
// dinput
if(inputMode == miopenRNNskip)
{
sp_size[1] = batch_n;
sp_size[2] = hy_h;
x_size[1] = batch_n;
x_size[2] = hy_h;
x_desc = miopen::TensorDescriptor(rnn_data_type, x_size, x_stride);
sp_desc = miopen::TensorDescriptor(rnn_data_type, sp_size, sp_stride);
const std::vector<int> dx_size{1, batch_n, hy_h};
x_desc = miopen::TensorDescriptor(rnn_data_type, dx_size, x_stride);
sp_desc = miopen::TensorDescriptor(rnn_data_type, dx_size, sp_stride);

alpha0 = 1;
alpha1 = 1;
beta_t = 0;

for(int gi = 0; gi < nHiddenTensorsPerLayer * bi; gi++)
CopyTensor(handle, sp_desc, workSpace, x_desc, dx, 0, 0, true);
profileRNNkernels(handle, 1, ctime);
for(int gi = 1; gi < nHiddenTensorsPerLayer * bi; gi++)
{
OpTensor(handle,
miopenTensorOpAdd,
Expand Down
4 changes: 4 additions & 0 deletions src/rnn/rnn_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,10 @@ void RNNTensorBaseLayoutConverter::ChangeTensorGPUDataPadding(
const std::vector<size_t> packed_stride =
get_packed_stride(copy_size, tensor_desc.GetLayoutVector());

// Nothing to copy, avoiding error with zero lens in TensorDescriptor
if(!std::all_of(copy_size.cbegin(), copy_size.cend(), [](size_t x) { return x > 0; }))
continue;
shurale-nkn marked this conversation as resolved.
Show resolved Hide resolved

const auto packed_desc =
miopen::TensorDescriptor(tensor_desc.GetType(), copy_size, packed_stride);
const auto padded_desc =
Expand Down
31 changes: 20 additions & 11 deletions src/seq_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t,
: SeqTensorDescriptor(t,
layout_in,
ConvertLengthsOrThrow(lens_in, "Lengths must be > 0"),
ConvertLengthsOrThrow(seq_len, "SequenceLengths must be >= 0"),
ConvertLengthsOrThrow(seq_len, "SequenceLengths must be >= 0", true),
{},
padding_marker_in,
use_seq_len,
Expand Down Expand Up @@ -429,22 +429,31 @@ std::vector<size_t> SeqTensorDescriptor::GetBatchesPerSequence() const
}
else
{
batches.reserve(sequence_len[0]);
auto block_begin = sequence_len.rbegin();
auto sample_ptr = sequence_len.rbegin();
auto batch_size = sequence_len.size();

batches.insert(batches.end(), *block_begin, batch_size);
while(block_begin != sequence_len.rend() && *block_begin == 0)
++block_begin;

while(sample_ptr != sequence_len.rend())
if(block_begin != sequence_len.rend())
{
if(*sample_ptr != *block_begin)
auto sample_ptr = block_begin;
auto batch_size = sequence_len.rend() - block_begin;

batches.insert(batches.end(), *block_begin, batch_size);

while(sample_ptr != sequence_len.rend())
{
batch_size = batch_size - (sample_ptr - block_begin);
const auto seq_count = *sample_ptr - *block_begin;
batches.insert(batches.end(), seq_count, batch_size);
block_begin = sample_ptr;
if(*sample_ptr != *block_begin)
{
batch_size = batch_size - (sample_ptr - block_begin);
const auto seq_count = *sample_ptr - *block_begin;
batches.insert(batches.end(), seq_count, batch_size);

block_begin = sample_ptr;
}
sample_ptr++;
}
sample_ptr++;
}
}
return batches;
Expand Down
Loading