diff --git a/include/gtensor/assign.h b/include/gtensor/assign.h index c834ff43..947c9b72 100644 --- a/include/gtensor/assign.h +++ b/include/gtensor/assign.h @@ -134,7 +134,6 @@ struct assigner<6, space::host> } }; - template <> struct assigner<7, space::host> { @@ -144,14 +143,14 @@ struct assigner<7, space::host> // printf("assigner<7, host>\n"); for (int o = 0; o < lhs.shape(6); o++) { for (int n = 0; n < lhs.shape(5); n++) { - for (int m = 0; m < lhs.shape(4); m++) { - for (int l = 0; l < lhs.shape(3); l++) { - for (int k = 0; k < lhs.shape(2); k++) { - for (int j = 0; j < lhs.shape(1); j++) { - for (int i = 0; i < lhs.shape(0); i++) { - lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, n, o); - } - } + for (int m = 0; m < lhs.shape(4); m++) { + for (int l = 0; l < lhs.shape(3); l++) { + for (int k = 0; k < lhs.shape(2); k++) { + for (int j = 0; j < lhs.shape(1); j++) { + for (int i = 0; i < lhs.shape(0); i++) { + lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, n, o); + } + } } } } @@ -257,8 +256,7 @@ __global__ void kernel_assign_7(Elhs lhs, Erhs _rhs) int j = tidx / lhs.shape(0), i = tidx % lhs.shape(0); int l = tidy / lhs.shape(2), k = tidy % lhs.shape(2); int m = tidz % lhs.shape(5) % lhs.shape(4); - int n = tidz % lhs.shape(5) / lhs.shape(4), - int o = tidz / lhs.shape(5); + int n = tidz % lhs.shape(5) / lhs.shape(4), int o = tidz / lhs.shape(5); lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, o); } diff --git a/include/gtensor/expression.h b/include/gtensor/expression.h index 0c867fc0..c3580254 100644 --- a/include/gtensor/expression.h +++ b/include/gtensor/expression.h @@ -124,7 +124,7 @@ GT_INLINE decltype(auto) index_expression(E&& expr, shape_type<6> idx) return expr(idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); } - template +template GT_INLINE decltype(auto) index_expression(E&& expr, shape_type<7> idx) { return expr(idx[0], idx[1], idx[2], idx[3], idx[4], idx[5], idx[6]); diff --git a/include/gtensor/operator.h b/include/gtensor/operator.h index df0acea2..3b8bb5f7 100644 --- a/include/gtensor/operator.h +++ b/include/gtensor/operator.h @@ -355,7 +355,6 @@ struct equals<6, 6, space::host, space::host> } }; - template <> struct equals<7, 7, space::host, space::host> { @@ -368,14 +367,14 @@ struct equals<7, 7, space::host, space::host> for (int v = 0; v < e1.shape(6); v++) { for (int z = 0; z < e1.shape(5); z++) { - for (int y = 0; y < e1.shape(4); y++) { - for (int x = 0; x < e1.shape(3); x++) { - for (int k = 0; k < e1.shape(2); k++) { - for (int j = 0; j < e1.shape(1); j++) { - for (int i = 0; i < e1.shape(0); i++) { - if (e1(i, j, k, x, y, z, v) != e2(i, j, k, x, y, z, v)) { - return false; - } + for (int y = 0; y < e1.shape(4); y++) { + for (int x = 0; x < e1.shape(3); x++) { + for (int k = 0; k < e1.shape(2); k++) { + for (int j = 0; j < e1.shape(1); j++) { + for (int i = 0; i < e1.shape(0); i++) { + if (e1(i, j, k, x, y, z, v) != e2(i, j, k, x, y, z, v)) { + return false; + } } } } diff --git a/tests/test_launch.cxx b/tests/test_launch.cxx index 928c1b3a..0b86cb95 100644 --- a/tests/test_launch.cxx +++ b/tests/test_launch.cxx @@ -193,8 +193,9 @@ void device_double_add_7d(gt::gtensor_device& a, auto k_b = b.to_kernel(); gt::launch<7>( - a.shape(), GT_LAMBDA(int i, int j, int k, int l, int m, int n, int o) { - k_b(i, j, k, l, m, n, o) = k_a(i, j, k, l, m, n, o) + k_a(i, j, k, l, m, n, o); + a.shape(), GT_LAMBDA(int i, int j, int k, int l, int m, int n, int o) { + k_b(i, j, k, l, m, n, o) = + k_a(i, j, k, l, m, n, o) + k_a(i, j, k, l, m, n, o); }); gt::copy(b, out); } @@ -270,9 +271,9 @@ TEST(gtensor, device_launch_7d) for (int l = 0; l < h_a.shape(3); l++) { for (int m = 0; m < h_a.shape(4); m++) { for (int n = 0; n < h_a.shape(5); n++) { - for (int o = 0; n < h_a.shape(6); o++) { - h_a(i, j, k, l, m, n, o) = i + j + k + l + m + n +o; - } + for (int o = 0; n < h_a.shape(6); o++) { + h_a(i, j, k, l, m, n, o) = i + j + k + l + m + n + o; + } } } }