Merge pull request #289 from gmerlo/7d

7d
wdmapp · Sep 18, 2024 · bd980f5 · bd980f5
2 parents f7a018f + 18c8db3
commit bd980f5
Show file tree

Hide file tree

Showing 6 changed files with 288 additions and 1 deletion.
diff --git a/include/gtensor/assign.h b/include/gtensor/assign.h
@@ -134,6 +134,31 @@ struct assigner<6, space::host>
   }
 };
 
+template <>
+struct assigner<7, space::host>
+{
+  template <typename E1, typename E2>
+  static void run(E1& lhs, const E2& rhs, stream_view stream)
+  {
+    // printf("assigner<7, host>\n");
+    for (int o = 0; o < lhs.shape(6); o++) {
+      for (int n = 0; n < lhs.shape(5); n++) {
+        for (int m = 0; m < lhs.shape(4); m++) {
+          for (int l = 0; l < lhs.shape(3); l++) {
+            for (int k = 0; k < lhs.shape(2); k++) {
+              for (int j = 0; j < lhs.shape(1); j++) {
+                for (int i = 0; i < lhs.shape(0); i++) {
+                  lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, n, o);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+};
+
 #if defined(GTENSOR_DEVICE_CUDA) || defined(GTENSOR_DEVICE_HIP)
 
 #ifdef GTENSOR_PER_DIM_KERNELS
@@ -219,6 +244,27 @@ __global__ void kernel_assign_6(Elhs lhs, Erhs _rhs)
   }
 }
 
+template <typename Elhs, typename Erhs>
+__global__ void kernel_assign_7(Elhs lhs, Erhs _rhs)
+{
+  auto rhs = _rhs;
+  int tidx = threadIdx.x + blockIdx.x * blockDim.x;
+  int tidy = threadIdx.y + blockIdx.y * blockDim.y;
+  int tidz = blockIdx.z;
+  if (tidx < lhs.shape(0) * lhs.shape(1) &&
+      tidy < lhs.shape(2) * lhs.shape(3)) {
+    int j = tidx / lhs.shape(0);
+    int i = tidx % lhs.shape(0);
+    int l = tidy / lhs.shape(2);
+    int k = tidy % lhs.shape(2);
+    int n = tidz % lhs.shape(5) / lhs.shape(4);
+    int m = tidz % lhs.shape(5) % lhs.shape(4);
+    int o = tidz / lhs.shape(5);
+
+    lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, o);
+  }
+}
+
 template <>
 struct assigner<1, space::device>
 {
@@ -305,7 +351,7 @@ struct assigner<5, space::device>
   template <typename E1, typename E2>
   static void run(E1& lhs, const E2& rhs, stream_view stream)
   {
-    // printf("assigner<6, device>\n");
+    // printf("assigner<5, device>\n");
     dim3 numThreads(BS_X, BS_Y);
     dim3 numBlocks((lhs.shape(0) * lhs.shape(1) + BS_X - 1) / BS_X,
                    (lhs.shape(2) * lhs.shape(3) + BS_Y - 1) / BS_Y,
@@ -341,6 +387,27 @@ struct assigner<6, space::device>
   }
 };
 
+template <>
+struct assigner<7, space::device>
+{
+  template <typename E1, typename E2>
+  static void run(E1& lhs, const E2& rhs, stream_view stream)
+  {
+    // printf("assigner<7, device>\n");
+    dim3 numThreads(BS_X, BS_Y);
+    dim3 numBlocks((lhs.shape(0) * lhs.shape(1) + BS_X - 1) / BS_X,
+                   (lhs.shape(2) * lhs.shape(3) + BS_Y - 1) / BS_Y,
+                   lhs.shape(4) * lhs.shape(5) * lhs.shape(6));
+
+    gpuSyncIfEnabledStream(stream);
+    // std::cout << "rhs " << typeid(rhs.to_kernel()).name() << "\n";
+    gtLaunchKernel(kernel_assign_7, numBlocks, numThreads, 0,
+                   stream.get_backend_stream(), lhs.to_kernel(),
+                   rhs.to_kernel());
+    gpuSyncIfEnabledStream(stream);
+  }
+};
+
 #else // not defined GTENSOR_PER_DIM_KERNELS
 
 template <typename Elhs, typename Erhs, size_type N>

diff --git a/include/gtensor/expression.h b/include/gtensor/expression.h
@@ -124,6 +124,12 @@ GT_INLINE decltype(auto) index_expression(E&& expr, shape_type<6> idx)
   return expr(idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]);
 }
 
+template <typename E>
+GT_INLINE decltype(auto) index_expression(E&& expr, shape_type<7> idx)
+{
+  return expr(idx[0], idx[1], idx[2], idx[3], idx[4], idx[5], idx[6]);
+}
+
 } // namespace gt
 
 #endif
diff --git a/include/gtensor/gtensor.h b/include/gtensor/gtensor.h
@@ -303,6 +303,27 @@ __global__ void kernel_launch(gt::shape_type<6> shape, F f)
   }
 }
 
+template <typename F>
+__global__ void kernel_launch(gt::shape_type<7> shape, F f)
+{
+  int i = threadIdx.x + blockIdx.x * BS_X;
+  int j = threadIdx.y + blockIdx.y * BS_Y;
+  int b = blockIdx.z;
+  int o = b / (shape[2] * shape[3] * shape[4] * shape[5]);
+  b -= o * (shape[2] * shape[3] * shape[4] * hspae[5]);
+  int n = b / (shape[2] * shape[3] * shape[4]);
+  b -= n * (shape[2] * shape[3] * shape[4]);
+  int m = b / (shape[2] * shape[3]);
+  b -= m * (shape[2] * shape[3]);
+  int l = b / shape[2];
+  b -= l * shape[2];
+  int k = b;
+
+  if (i < shape[0] && j < shape[1]) {
+    f(i, j, k, l, m, n, o);
+  }
+}
+
 #else // not GTENSOR_PER_DIM_KERNELS
 
 template <typename F, size_type N>
@@ -428,6 +449,30 @@ struct launch<6, space::host>
   }
 };
 
+template <>
+struct launch<7, space::host>
+{
+  template <typename F>
+  static void run(const gt::shape_type<7>& shape, F&& f, gt::stream_view stream)
+  {
+    for (int o = 0; o < shape[6]; o++) {
+      for (int n = 0; n < shape[5]; n++) {
+        for (int m = 0; m < shape[4]; m++) {
+          for (int l = 0; l < shape[3]; l++) {
+            for (int k = 0; k < shape[2]; k++) {
+              for (int j = 0; j < shape[1]; j++) {
+                for (int i = 0; i < shape[0]; i++) {
+                  std::forward<F>(f)(i, j, k, l, m, n, o);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+};
+
 #if defined(GTENSOR_DEVICE_CUDA) || defined(GTENSOR_DEVICE_HIP)
 
 #ifdef GTENSOR_PER_DIM_KERNELS
@@ -529,6 +574,21 @@ struct launch<6, space::device>
   }
 };
 
+template <>
+struct launch<7, space::device>
+{
+  template <typename F>
+  static void run(const gt::shape_type<7>& shape, F&& f, gt::stream_view stream)
+  {
+    dim3 numThreads(BS_X, BS_Y);
+    dim3 numBlocks((shape[0] + BS_X - 1) / BS_X, (shape[1] + BS_Y - 1) / BS_Y,
+                   shape[2] * shape[3] * shape[4] * shape[5] * shape[6]);
+
+    gtLaunchKernel(kernel_launch, numBlocks, numThreads, 0,
+                   stream.get_backend_stream(), shape, std::forward<F>(f));
+  }
+};
+
 #else // not GTENSOR_PER_DIM_KERNELS
 
 template <int N>

diff --git a/include/gtensor/operator.h b/include/gtensor/operator.h
@@ -355,6 +355,37 @@ struct equals<6, 6, space::host, space::host>
   }
 };
 
+template <>
+struct equals<7, 7, space::host, space::host>
+{
+  template <typename E1, typename E2>
+  static bool run(const E1& e1, const E2& e2)
+  {
+    if (e1.shape() != e2.shape()) {
+      return false;
+    }
+
+    for (int v = 0; v < e1.shape(6); v++) {
+      for (int z = 0; z < e1.shape(5); z++) {
+        for (int y = 0; y < e1.shape(4); y++) {
+          for (int x = 0; x < e1.shape(3); x++) {
+            for (int k = 0; k < e1.shape(2); k++) {
+              for (int j = 0; j < e1.shape(1); j++) {
+                for (int i = 0; i < e1.shape(0); i++) {
+                  if (e1(i, j, k, x, y, z, v) != e2(i, j, k, x, y, z, v)) {
+                    return false;
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    return true;
+  }
+};
+
 #ifdef GTENSOR_HAVE_DEVICE
 
 template <size_type N1, size_type N2>

diff --git a/tests/test_assign.cxx b/tests/test_assign.cxx
@@ -23,6 +23,22 @@ TEST(assign, gtensor_6d)
   EXPECT_EQ(a, b);
 }
 
+TEST(assign, gtensor_7d)
+{
+  gt::gtensor<int, 7> a(gt::shape(2, 3, 4, 5, 6, 7, 8));
+  gt::gtensor<int, 7> b(a.shape());
+
+  int* adata = a.data();
+
+  for (int i = 0; i < a.size(); i++) {
+    adata[i] = i;
+  }
+
+  EXPECT_NE(a, b);
+  b = a;
+  EXPECT_EQ(a, b);
+}
+
 TEST(assign, gview_1d_scalar)
 {
   auto a = gt::empty<int>(gt::shape(5));
@@ -81,6 +97,23 @@ TEST(assign, broadcast_6d)
   }
 }
 
+TEST(assign, broadcast_7d)
+{
+  gt::gtensor<int, 7> a(gt::shape(8, 1, 2, 4, 1, 1, 6), 0);
+  gt::gtensor<int, 7> b(gt::shape(8, 1, 2, 1, 1, 1, 6), -7);
+
+  gt::assign(a, b);
+
+  for (int i = 0; i < a.shape(0); i++) {
+    for (int j = 0; j < a.shape(2); j++) {
+      for (int k = 0; k < a.shape(3); k++) {
+        for (int l = 0; l < a.shape(6); l++) {
+          EXPECT_EQ(a(i, 0, j, k, 0, 0, l), -7);
+        }
+      }
+    }
+  }
+}
 #ifdef GTENSOR_HAVE_DEVICE
 
 TEST(assign, device_gtensor_6d)
@@ -103,6 +136,26 @@ TEST(assign, device_gtensor_6d)
   EXPECT_EQ(h_a, h_b);
 }
 
+TEST(assign, device_gtensor_7d)
+{
+  gt::gtensor_device<int, 7> a(gt::shape(2, 3, 4, 5, 6, 7, 8));
+  gt::gtensor_device<int, 7> b(a.shape());
+  gt::gtensor<int, 7> h_a(a.shape());
+  gt::gtensor<int, 7> h_b(a.shape());
+
+  int* adata = h_a.data();
+
+  for (int i = 0; i < a.size(); i++) {
+    adata[i] = i;
+  }
+
+  gt::copy(h_a, a);
+  b = a;
+  gt::copy(b, h_b);
+
+  EXPECT_EQ(h_a, h_b);
+}
+
 TEST(assign, device_gtensor_fill)
 {
   gt::gtensor_device<float, 2> a(gt::shape(2, 3));
@@ -403,4 +456,26 @@ TEST(assign, device_broadcast_6d)
   }
 }
 
+TEST(assign, device_broadcast_7d)
+{
+  gt::gtensor_device<int, 7> a(gt::shape(8, 1, 2, 4, 1, 1, 4), 0);
+  gt::gtensor_device<int, 7> b(gt::shape(8, 1, 2, 1, 1, 1, 4), -7);
+
+  gt::gtensor<int, 7> h_a(a.shape());
+
+  gt::assign(a, b);
+
+  gt::copy(a, h_a);
+
+  for (int i = 0; i < h_a.shape(0); i++) {
+    for (int j = 0; j < h_a.shape(2); j++) {
+      for (int k = 0; k < h_a.shape(3); k++) {
+        for (int l = 0; l < h_a.shape(6); l++) {
+          EXPECT_EQ(h_a(i, 0, j, k, 0, 0, l), -7);
+        }
+      }
+    }
+  }
+}
+
 #endif // GTENSOR_HAVE_DEVICE
diff --git a/tests/test_launch.cxx b/tests/test_launch.cxx
@@ -184,6 +184,22 @@ void device_double_add_6d(gt::gtensor_device<double, 6>& a,
   gt::copy(b, out);
 }
 
+void device_double_add_7d(gt::gtensor_device<double, 7>& a,
+                          gt::gtensor<double, 7>& out)
+{
+  auto b = gt::empty_like(a);
+
+  auto k_a = a.to_kernel();
+  auto k_b = b.to_kernel();
+
+  gt::launch<7>(
+    a.shape(), GT_LAMBDA(int i, int j, int k, int l, int m, int n, int o) {
+      k_b(i, j, k, l, m, n, o) =
+        k_a(i, j, k, l, m, n, o) + k_a(i, j, k, l, m, n, o);
+    });
+  gt::copy(b, out);
+}
+
 TEST(gtensor, device_launch_5d)
 {
   gt::gtensor<double, 5> h_a(gt::shape(2, 2, 2, 2, 2));
@@ -242,4 +258,36 @@ TEST(gtensor, device_launch_6d)
   EXPECT_EQ(h_b, h_b_expected);
 }
 
+TEST(gtensor, device_launch_7d)
+{
+  gt::gtensor<double, 7> h_a(gt::shape(2, 2, 2, 2, 2, 2, 2));
+  gt::gtensor_device<double, 7> a(h_a.shape());
+  gt::gtensor<double, 7> h_b(h_a.shape());
+  gt::gtensor<double, 7> h_b_expected(h_a.shape());
+
+  for (int i = 0; i < h_a.shape(0); i++) {
+    for (int j = 0; j < h_a.shape(1); j++) {
+      for (int k = 0; k < h_a.shape(2); k++) {
+        for (int l = 0; l < h_a.shape(3); l++) {
+          for (int m = 0; m < h_a.shape(4); m++) {
+            for (int n = 0; n < h_a.shape(5); n++) {
+              for (int o = 0; o < h_a.shape(6); o++) {
+                h_a(i, j, k, l, m, n, o) = i + j + k + l + m + n + o;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  h_b_expected = 2 * h_a;
+
+  gt::copy(h_a, a);
+
+  device_double_add_7d(a, h_b);
+
+  EXPECT_EQ(h_b, h_b_expected);
+}
+
 #endif // GTENSOR_HAVE_DEVICE