Skip to content

Commit

Permalink
Merge pull request #289 from gmerlo/7d
Browse files Browse the repository at this point in the history
7d
  • Loading branch information
bd4 authored Sep 18, 2024
2 parents f7a018f + 18c8db3 commit bd980f5
Show file tree
Hide file tree
Showing 6 changed files with 288 additions and 1 deletion.
69 changes: 68 additions & 1 deletion include/gtensor/assign.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,31 @@ struct assigner<6, space::host>
}
};

template <>
struct assigner<7, space::host>
{
template <typename E1, typename E2>
static void run(E1& lhs, const E2& rhs, stream_view stream)
{
// printf("assigner<7, host>\n");
for (int o = 0; o < lhs.shape(6); o++) {
for (int n = 0; n < lhs.shape(5); n++) {
for (int m = 0; m < lhs.shape(4); m++) {
for (int l = 0; l < lhs.shape(3); l++) {
for (int k = 0; k < lhs.shape(2); k++) {
for (int j = 0; j < lhs.shape(1); j++) {
for (int i = 0; i < lhs.shape(0); i++) {
lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, n, o);
}
}
}
}
}
}
}
}
};

#if defined(GTENSOR_DEVICE_CUDA) || defined(GTENSOR_DEVICE_HIP)

#ifdef GTENSOR_PER_DIM_KERNELS
Expand Down Expand Up @@ -219,6 +244,27 @@ __global__ void kernel_assign_6(Elhs lhs, Erhs _rhs)
}
}

template <typename Elhs, typename Erhs>
__global__ void kernel_assign_7(Elhs lhs, Erhs _rhs)
{
auto rhs = _rhs;
int tidx = threadIdx.x + blockIdx.x * blockDim.x;
int tidy = threadIdx.y + blockIdx.y * blockDim.y;
int tidz = blockIdx.z;
if (tidx < lhs.shape(0) * lhs.shape(1) &&
tidy < lhs.shape(2) * lhs.shape(3)) {
int j = tidx / lhs.shape(0);
int i = tidx % lhs.shape(0);
int l = tidy / lhs.shape(2);
int k = tidy % lhs.shape(2);
int n = tidz % lhs.shape(5) / lhs.shape(4);
int m = tidz % lhs.shape(5) % lhs.shape(4);
int o = tidz / lhs.shape(5);

lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, o);
}
}

template <>
struct assigner<1, space::device>
{
Expand Down Expand Up @@ -305,7 +351,7 @@ struct assigner<5, space::device>
template <typename E1, typename E2>
static void run(E1& lhs, const E2& rhs, stream_view stream)
{
// printf("assigner<6, device>\n");
// printf("assigner<5, device>\n");
dim3 numThreads(BS_X, BS_Y);
dim3 numBlocks((lhs.shape(0) * lhs.shape(1) + BS_X - 1) / BS_X,
(lhs.shape(2) * lhs.shape(3) + BS_Y - 1) / BS_Y,
Expand Down Expand Up @@ -341,6 +387,27 @@ struct assigner<6, space::device>
}
};

template <>
struct assigner<7, space::device>
{
template <typename E1, typename E2>
static void run(E1& lhs, const E2& rhs, stream_view stream)
{
// printf("assigner<7, device>\n");
dim3 numThreads(BS_X, BS_Y);
dim3 numBlocks((lhs.shape(0) * lhs.shape(1) + BS_X - 1) / BS_X,
(lhs.shape(2) * lhs.shape(3) + BS_Y - 1) / BS_Y,
lhs.shape(4) * lhs.shape(5) * lhs.shape(6));

gpuSyncIfEnabledStream(stream);
// std::cout << "rhs " << typeid(rhs.to_kernel()).name() << "\n";
gtLaunchKernel(kernel_assign_7, numBlocks, numThreads, 0,
stream.get_backend_stream(), lhs.to_kernel(),
rhs.to_kernel());
gpuSyncIfEnabledStream(stream);
}
};

#else // not defined GTENSOR_PER_DIM_KERNELS

template <typename Elhs, typename Erhs, size_type N>
Expand Down
6 changes: 6 additions & 0 deletions include/gtensor/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,12 @@ GT_INLINE decltype(auto) index_expression(E&& expr, shape_type<6> idx)
return expr(idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]);
}

template <typename E>
GT_INLINE decltype(auto) index_expression(E&& expr, shape_type<7> idx)
{
return expr(idx[0], idx[1], idx[2], idx[3], idx[4], idx[5], idx[6]);
}

} // namespace gt

#endif
60 changes: 60 additions & 0 deletions include/gtensor/gtensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,27 @@ __global__ void kernel_launch(gt::shape_type<6> shape, F f)
}
}

template <typename F>
__global__ void kernel_launch(gt::shape_type<7> shape, F f)
{
int i = threadIdx.x + blockIdx.x * BS_X;
int j = threadIdx.y + blockIdx.y * BS_Y;
int b = blockIdx.z;
int o = b / (shape[2] * shape[3] * shape[4] * shape[5]);
b -= o * (shape[2] * shape[3] * shape[4] * hspae[5]);
int n = b / (shape[2] * shape[3] * shape[4]);
b -= n * (shape[2] * shape[3] * shape[4]);
int m = b / (shape[2] * shape[3]);
b -= m * (shape[2] * shape[3]);
int l = b / shape[2];
b -= l * shape[2];
int k = b;

if (i < shape[0] && j < shape[1]) {
f(i, j, k, l, m, n, o);
}
}

#else // not GTENSOR_PER_DIM_KERNELS

template <typename F, size_type N>
Expand Down Expand Up @@ -428,6 +449,30 @@ struct launch<6, space::host>
}
};

template <>
struct launch<7, space::host>
{
template <typename F>
static void run(const gt::shape_type<7>& shape, F&& f, gt::stream_view stream)
{
for (int o = 0; o < shape[6]; o++) {
for (int n = 0; n < shape[5]; n++) {
for (int m = 0; m < shape[4]; m++) {
for (int l = 0; l < shape[3]; l++) {
for (int k = 0; k < shape[2]; k++) {
for (int j = 0; j < shape[1]; j++) {
for (int i = 0; i < shape[0]; i++) {
std::forward<F>(f)(i, j, k, l, m, n, o);
}
}
}
}
}
}
}
}
};

#if defined(GTENSOR_DEVICE_CUDA) || defined(GTENSOR_DEVICE_HIP)

#ifdef GTENSOR_PER_DIM_KERNELS
Expand Down Expand Up @@ -529,6 +574,21 @@ struct launch<6, space::device>
}
};

template <>
struct launch<7, space::device>
{
template <typename F>
static void run(const gt::shape_type<7>& shape, F&& f, gt::stream_view stream)
{
dim3 numThreads(BS_X, BS_Y);
dim3 numBlocks((shape[0] + BS_X - 1) / BS_X, (shape[1] + BS_Y - 1) / BS_Y,
shape[2] * shape[3] * shape[4] * shape[5] * shape[6]);

gtLaunchKernel(kernel_launch, numBlocks, numThreads, 0,
stream.get_backend_stream(), shape, std::forward<F>(f));
}
};

#else // not GTENSOR_PER_DIM_KERNELS

template <int N>
Expand Down
31 changes: 31 additions & 0 deletions include/gtensor/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,37 @@ struct equals<6, 6, space::host, space::host>
}
};

template <>
struct equals<7, 7, space::host, space::host>
{
template <typename E1, typename E2>
static bool run(const E1& e1, const E2& e2)
{
if (e1.shape() != e2.shape()) {
return false;
}

for (int v = 0; v < e1.shape(6); v++) {
for (int z = 0; z < e1.shape(5); z++) {
for (int y = 0; y < e1.shape(4); y++) {
for (int x = 0; x < e1.shape(3); x++) {
for (int k = 0; k < e1.shape(2); k++) {
for (int j = 0; j < e1.shape(1); j++) {
for (int i = 0; i < e1.shape(0); i++) {
if (e1(i, j, k, x, y, z, v) != e2(i, j, k, x, y, z, v)) {
return false;
}
}
}
}
}
}
}
}
return true;
}
};

#ifdef GTENSOR_HAVE_DEVICE

template <size_type N1, size_type N2>
Expand Down
75 changes: 75 additions & 0 deletions tests/test_assign.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ TEST(assign, gtensor_6d)
EXPECT_EQ(a, b);
}

TEST(assign, gtensor_7d)
{
gt::gtensor<int, 7> a(gt::shape(2, 3, 4, 5, 6, 7, 8));
gt::gtensor<int, 7> b(a.shape());

int* adata = a.data();

for (int i = 0; i < a.size(); i++) {
adata[i] = i;
}

EXPECT_NE(a, b);
b = a;
EXPECT_EQ(a, b);
}

TEST(assign, gview_1d_scalar)
{
auto a = gt::empty<int>(gt::shape(5));
Expand Down Expand Up @@ -81,6 +97,23 @@ TEST(assign, broadcast_6d)
}
}

TEST(assign, broadcast_7d)
{
gt::gtensor<int, 7> a(gt::shape(8, 1, 2, 4, 1, 1, 6), 0);
gt::gtensor<int, 7> b(gt::shape(8, 1, 2, 1, 1, 1, 6), -7);

gt::assign(a, b);

for (int i = 0; i < a.shape(0); i++) {
for (int j = 0; j < a.shape(2); j++) {
for (int k = 0; k < a.shape(3); k++) {
for (int l = 0; l < a.shape(6); l++) {
EXPECT_EQ(a(i, 0, j, k, 0, 0, l), -7);
}
}
}
}
}
#ifdef GTENSOR_HAVE_DEVICE

TEST(assign, device_gtensor_6d)
Expand All @@ -103,6 +136,26 @@ TEST(assign, device_gtensor_6d)
EXPECT_EQ(h_a, h_b);
}

TEST(assign, device_gtensor_7d)
{
gt::gtensor_device<int, 7> a(gt::shape(2, 3, 4, 5, 6, 7, 8));
gt::gtensor_device<int, 7> b(a.shape());
gt::gtensor<int, 7> h_a(a.shape());
gt::gtensor<int, 7> h_b(a.shape());

int* adata = h_a.data();

for (int i = 0; i < a.size(); i++) {
adata[i] = i;
}

gt::copy(h_a, a);
b = a;
gt::copy(b, h_b);

EXPECT_EQ(h_a, h_b);
}

TEST(assign, device_gtensor_fill)
{
gt::gtensor_device<float, 2> a(gt::shape(2, 3));
Expand Down Expand Up @@ -403,4 +456,26 @@ TEST(assign, device_broadcast_6d)
}
}

TEST(assign, device_broadcast_7d)
{
gt::gtensor_device<int, 7> a(gt::shape(8, 1, 2, 4, 1, 1, 4), 0);
gt::gtensor_device<int, 7> b(gt::shape(8, 1, 2, 1, 1, 1, 4), -7);

gt::gtensor<int, 7> h_a(a.shape());

gt::assign(a, b);

gt::copy(a, h_a);

for (int i = 0; i < h_a.shape(0); i++) {
for (int j = 0; j < h_a.shape(2); j++) {
for (int k = 0; k < h_a.shape(3); k++) {
for (int l = 0; l < h_a.shape(6); l++) {
EXPECT_EQ(h_a(i, 0, j, k, 0, 0, l), -7);
}
}
}
}
}

#endif // GTENSOR_HAVE_DEVICE
48 changes: 48 additions & 0 deletions tests/test_launch.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,22 @@ void device_double_add_6d(gt::gtensor_device<double, 6>& a,
gt::copy(b, out);
}

void device_double_add_7d(gt::gtensor_device<double, 7>& a,
gt::gtensor<double, 7>& out)
{
auto b = gt::empty_like(a);

auto k_a = a.to_kernel();
auto k_b = b.to_kernel();

gt::launch<7>(
a.shape(), GT_LAMBDA(int i, int j, int k, int l, int m, int n, int o) {
k_b(i, j, k, l, m, n, o) =
k_a(i, j, k, l, m, n, o) + k_a(i, j, k, l, m, n, o);
});
gt::copy(b, out);
}

TEST(gtensor, device_launch_5d)
{
gt::gtensor<double, 5> h_a(gt::shape(2, 2, 2, 2, 2));
Expand Down Expand Up @@ -242,4 +258,36 @@ TEST(gtensor, device_launch_6d)
EXPECT_EQ(h_b, h_b_expected);
}

TEST(gtensor, device_launch_7d)
{
gt::gtensor<double, 7> h_a(gt::shape(2, 2, 2, 2, 2, 2, 2));
gt::gtensor_device<double, 7> a(h_a.shape());
gt::gtensor<double, 7> h_b(h_a.shape());
gt::gtensor<double, 7> h_b_expected(h_a.shape());

for (int i = 0; i < h_a.shape(0); i++) {
for (int j = 0; j < h_a.shape(1); j++) {
for (int k = 0; k < h_a.shape(2); k++) {
for (int l = 0; l < h_a.shape(3); l++) {
for (int m = 0; m < h_a.shape(4); m++) {
for (int n = 0; n < h_a.shape(5); n++) {
for (int o = 0; o < h_a.shape(6); o++) {
h_a(i, j, k, l, m, n, o) = i + j + k + l + m + n + o;
}
}
}
}
}
}
}

h_b_expected = 2 * h_a;

gt::copy(h_a, a);

device_double_add_7d(a, h_b);

EXPECT_EQ(h_b, h_b_expected);
}

#endif // GTENSOR_HAVE_DEVICE

0 comments on commit bd980f5

Please sign in to comment.