From a6971bb4775a22209924f1a6435ce3cc4fd34684 Mon Sep 17 00:00:00 2001 From: Carl-Martin Pfeiler Date: Thu, 17 Aug 2023 17:00:46 +0200 Subject: [PATCH 01/38] [test] Test gt::half scalar arithmetic (+,-,*,/) --- tests/test_half.cxx | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tests/test_half.cxx diff --git a/tests/test_half.cxx b/tests/test_half.cxx new file mode 100644 index 00000000..43a4aafe --- /dev/null +++ b/tests/test_half.cxx @@ -0,0 +1,30 @@ +#include + +#include + +#include + +TEST(half, ScalarArithmetic) +{ + gt::half a{1.0}; + gt::half b{2.0}; + + gt::half c{0.0}; + gt::half ref{0.0}; + + c = a + b; + ref = 3.0; + EXPECT_EQ(c, ref); + + c = a - b; + ref = -1.0; + EXPECT_EQ(c, ref); + + c = a * b; + ref = 2.0; + EXPECT_EQ(c, ref); + + c = a / b; + ref = 0.5; + EXPECT_EQ(c, ref); +} From f402b3e5dd17fdde279368511ae465c4d5a802ba Mon Sep 17 00:00:00 2001 From: Carl-Martin Pfeiler Date: Thu, 17 Aug 2023 17:07:55 +0200 Subject: [PATCH 02/38] [test] Register test_half --- tests/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a5e59062..90538c02 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -43,6 +43,7 @@ add_gtensor_test(test_space) add_gtensor_test(test_stream) add_gtensor_test(test_gtest_predicates) add_gtensor_test(test_sparse) +add_gtensor_test(test_half) if (GTENSOR_ENABLE_CLIB) add_executable(test_clib) From 49b1c3f6ff39c60d1dcfd873f6f74606d35e05c9 Mon Sep 17 00:00:00 2001 From: Carl-Martin Pfeiler Date: Thu, 17 Aug 2023 17:18:58 +0200 Subject: [PATCH 03/38] [feat] Prototype for gt::half as of #271 --- include/gtensor/half.h | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 include/gtensor/half.h diff --git a/include/gtensor/half.h b/include/gtensor/half.h new file mode 100644 index 00000000..8fa7a44a --- /dev/null +++ b/include/gtensor/half.h @@ -0,0 +1,46 @@ +// half_wrapper.hxx +#ifndef HALF_WRAPPER +#define HALF_WRAPPER + +#include +#include + +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) +#define TARGET_ARCH __host__ __device__ +#else +#define TARGET_ARCH +#endif + +class HalfWrapper +{ +public: + TARGET_ARCH HalfWrapper(float x) : x(x) {}; + TARGET_ARCH HalfWrapper(half x) : x(x) {}; + TARGET_ARCH const HalfWrapper& operator=(const float f) { x = f; return *this; } + TARGET_ARCH const half& Get() const { return x; } +private: + half x; +}; + +TARGET_ARCH const HalfWrapper operator+(const HalfWrapper& lhs, const HalfWrapper& rhs) +{ +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) +return HalfWrapper( lhs.Get() + rhs.Get() ); +#else +return HalfWrapper( float(lhs.Get()) + float(rhs.Get()) ); +#endif +} + +TARGET_ARCH const HalfWrapper operator*(const HalfWrapper& lhs, const HalfWrapper& rhs) +{ +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) +return HalfWrapper( lhs.Get() * rhs.Get() ); +#else +return HalfWrapper( float(lhs.Get()) * float(rhs.Get()) ); +#endif +} + +std::ostream& operator<<(std::ostream& s, const HalfWrapper& h) +{ s << (float) h.Get(); return s; } + +#endif From e26f092e16181e7a7d27c640a22982662e537ede Mon Sep 17 00:00:00 2001 From: Carl-Martin Pfeiler Date: Thu, 17 Aug 2023 18:02:35 +0200 Subject: [PATCH 04/38] [clean] Namespace and renaming --- include/gtensor/half.h | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index 8fa7a44a..748177b0 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -1,46 +1,52 @@ -// half_wrapper.hxx -#ifndef HALF_WRAPPER -#define HALF_WRAPPER +#ifndef GTENSOR_HALF_H +#define GTENSOR_HALF_H #include #include +namespace gt +{ + +// ====================================================================== +// half #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) #define TARGET_ARCH __host__ __device__ #else #define TARGET_ARCH #endif -class HalfWrapper +class half { public: - TARGET_ARCH HalfWrapper(float x) : x(x) {}; - TARGET_ARCH HalfWrapper(half x) : x(x) {}; - TARGET_ARCH const HalfWrapper& operator=(const float f) { x = f; return *this; } - TARGET_ARCH const half& Get() const { return x; } + TARGET_ARCH half(float x) : x(x) {}; + TARGET_ARCH half(__half x) : x(x) {}; + TARGET_ARCH const half& operator=(const float f) { x = f; return *this; } + TARGET_ARCH const __half& Get() const { return x; } private: - half x; + __half x; }; -TARGET_ARCH const HalfWrapper operator+(const HalfWrapper& lhs, const HalfWrapper& rhs) +TARGET_ARCH const half operator+(const half& lhs, const half& rhs) { #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) -return HalfWrapper( lhs.Get() + rhs.Get() ); + return half( lhs.Get() + rhs.Get() ); #else -return HalfWrapper( float(lhs.Get()) + float(rhs.Get()) ); + return half( float(lhs.Get()) + float(rhs.Get()) ); #endif } -TARGET_ARCH const HalfWrapper operator*(const HalfWrapper& lhs, const HalfWrapper& rhs) +TARGET_ARCH const half operator*(const half& lhs, const half& rhs) { #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) -return HalfWrapper( lhs.Get() * rhs.Get() ); + return half( lhs.Get() * rhs.Get() ); #else -return HalfWrapper( float(lhs.Get()) * float(rhs.Get()) ); + return half( float(lhs.Get()) * float(rhs.Get()) ); #endif } -std::ostream& operator<<(std::ostream& s, const HalfWrapper& h) +std::ostream& operator<<(std::ostream& s, const half& h) { s << (float) h.Get(); return s; } +} // namespace gt + #endif From 93effea725cf630d49e87695c269294a8d5d3713 Mon Sep 17 00:00:00 2001 From: Carl-Martin Pfeiler Date: Thu, 17 Aug 2023 18:04:24 +0200 Subject: [PATCH 05/38] [feat] Further operators -,/,== for test_half --- include/gtensor/half.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index 748177b0..c46f1851 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -35,6 +35,15 @@ TARGET_ARCH const half operator+(const half& lhs, const half& rhs) #endif } +TARGET_ARCH const half operator-(const half& lhs, const half& rhs) +{ +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) + return half( lhs.Get() - rhs.Get() ); +#else + return half( float(lhs.Get()) - float(rhs.Get()) ); +#endif +} + TARGET_ARCH const half operator*(const half& lhs, const half& rhs) { #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) @@ -44,6 +53,24 @@ TARGET_ARCH const half operator*(const half& lhs, const half& rhs) #endif } +TARGET_ARCH const half operator/(const half& lhs, const half& rhs) +{ +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) + return half( lhs.Get() / rhs.Get() ); +#else + return half( float(lhs.Get()) / float(rhs.Get()) ); +#endif +} + +TARGET_ARCH bool operator==(const half& lhs, const half& rhs) +{ +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) + return lhs.Get() == rhs.Get(); +#else + return float(lhs.Get()) == float(rhs.Get()); +#endif +} + std::ostream& operator<<(std::ostream& s, const half& h) { s << (float) h.Get(); return s; } From 465a4653b6a3801b1d80cae48f8a71686aecd741 Mon Sep 17 00:00:00 2001 From: Carl-Martin Pfeiler Date: Fri, 18 Aug 2023 15:14:42 +0200 Subject: [PATCH 06/38] [test] Host auto initialize; test needs operator!= + NOTE: Straightforward auto-initialization doesn't work on device, i.e., doesn't compile for gt::gtensor --> will be documented in one of the next commits --- include/gtensor/half.h | 5 +++++ tests/test_half.cxx | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index c46f1851..e54abb03 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -71,6 +71,11 @@ TARGET_ARCH bool operator==(const half& lhs, const half& rhs) #endif } +TARGET_ARCH bool operator!=(const half& lhs, const half& rhs) +{ + return !(lhs == rhs); +} + std::ostream& operator<<(std::ostream& s, const half& h) { s << (float) h.Get(); return s; } diff --git a/tests/test_half.cxx b/tests/test_half.cxx index 43a4aafe..da4945d6 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -28,3 +28,22 @@ TEST(half, ScalarArithmetic) ref = 0.5; EXPECT_EQ(c, ref); } + +template +void generic_fill_1d(gt::gtensor& x, const gt::half& fill_value) +{ + auto k_x = x.to_kernel(); + + gt::launch<1, S>(x.shape(), GT_LAMBDA(int i) { k_x(i) = fill_value; }); +} + +TEST(half, AutoInitHost) +{ + gt::half fill_value{1.25}; + gt::gtensor a(gt::shape(5), fill_value); + gt::gtensor b(a.shape()); + + generic_fill_1d(b, fill_value); + + EXPECT_EQ(a, b); +} From 4d2852a63b2147d261b8d4c6a4aa5eca8d818df6 Mon Sep 17 00:00:00 2001 From: Carl-Martin Pfeiler Date: Fri, 18 Aug 2023 15:37:21 +0200 Subject: [PATCH 07/38] [test] Explicit kernel haxpy on host + device + NOTE: Need to use cmake with CMAKE_CUDA_ARCHITECTURES flag. E.g., -DCMAKE_CUDA_ARCHITECTURES=80 for A100 [some value >= 53 to properly use CUDA __half type] + TODO: Later maybe this can be taken care of in CMakeLists.txt --- tests/test_half.cxx | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index da4945d6..3aab6aee 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -47,3 +47,57 @@ TEST(half, AutoInitHost) EXPECT_EQ(a, b); } + +void host_explicit_haxpy_1d(const gt::half& a, + const gt::gtensor& x, + gt::gtensor& y) +{ + auto k_x = x.to_kernel(); + auto k_y = y.to_kernel(); + + gt::launch_host<1>( + y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); +} + +TEST(half, HaxpyExplicit1dHost) +{ + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); + gt::half a{0.5}; + gt::gtensor ref(x.shape(), 3.25); + + host_explicit_haxpy_1d(a, x, y); + + EXPECT_EQ(y, ref); +} + +template +void generic_explicit_haxpy_1d( const gt::half& a, + const gt::gtensor& x, + gt::gtensor& y) +{ + auto k_x = x.to_kernel(); + auto k_y = y.to_kernel(); + + gt::launch<1, S>( + y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); +} + +TEST(half, HaxpyExplicit1dDevice) +{ + gt::gtensor h_x(gt::shape(3), gt::half{1.5}); + gt::gtensor h_y(h_x.shape(), gt::half{2.5}); + gt::half a{0.5}; + gt::gtensor ref(h_x.shape(), 3.25); + + gt::gtensor d_x(h_x.shape()); + gt::gtensor d_y(h_y.shape()); + gt::copy(h_x, d_x); + gt::copy(h_y, d_y); + + generic_explicit_haxpy_1d(a, d_x, d_y); + + gt::copy(d_y, h_y); + + EXPECT_EQ(h_y, ref); +} From 3856889dfa95c557729be7b72b3d77a6c3893df1 Mon Sep 17 00:00:00 2001 From: Carl-Martin Pfeiler Date: Fri, 18 Aug 2023 16:13:53 +0200 Subject: [PATCH 08/38] [test] Documents failure of device half auto init + See NOTE in commit db3eec8 + Works for host+half, host+float, device+float + Fails for device+half --- tests/CMakeLists.txt | 1 + tests/test_half_failing.cxx | 75 +++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 tests/test_half_failing.cxx diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 90538c02..396b5caa 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -44,6 +44,7 @@ add_gtensor_test(test_stream) add_gtensor_test(test_gtest_predicates) add_gtensor_test(test_sparse) add_gtensor_test(test_half) +add_gtensor_test(test_half_failing) if (GTENSOR_ENABLE_CLIB) add_executable(test_clib) diff --git a/tests/test_half_failing.cxx b/tests/test_half_failing.cxx new file mode 100644 index 00000000..6a5c82b8 --- /dev/null +++ b/tests/test_half_failing.cxx @@ -0,0 +1,75 @@ +#include + +#include + +#include + +template +void generic_fill_1d(gt::gtensor& x, const fp_type& fill_value) +{ + auto k_x = x.to_kernel(); + + gt::launch<1, S>( + x.shape(), GT_LAMBDA(int i) { k_x(i) = fill_value; }); +} + +TEST(halfFailing, AutoInitHostFloat) +{ + float fill_value{1.25}; + gt::gtensor a(gt::shape(5), fill_value); + gt::gtensor b(a.shape()); + + generic_fill_1d(b, fill_value); + + EXPECT_EQ(a, b); +} + +TEST(halfFailing, AutoInitHostHalf) +{ + gt::half fill_value{1.25}; + gt::gtensor a(gt::shape(5), fill_value); + gt::gtensor b(a.shape()); + + generic_fill_1d(b, fill_value); + + EXPECT_EQ(a, b); +} + +TEST(halfFailing, AutoInitDeviceFloat) +{ + float fill_value{1.25}; + gt::gtensor d_a(gt::shape(5), fill_value); + gt::gtensor d_b(d_a.shape()); + + generic_fill_1d(d_b, fill_value); + + gt::gtensor h_a(d_a.shape()); + gt::copy(d_a, h_a); + gt::gtensor h_b(d_a.shape()); + gt::copy(d_b, h_b); + + EXPECT_EQ(h_a, h_b); +} + +TEST(halfFailing, AutoInitDeviceHalf) +{ + gt::half fill_value{1.25}; + // DOES NOT COMPILE !!! ---------------------------------------------------- + // gt::gtensor d_a(gt::shape(5), fill_value); + EXPECT_EQ(true, false); + // temporary workaround: + gt::gtensor d_a(gt::shape(5)); + generic_fill_1d(d_a, fill_value); + // ------------------------------------------------------------------------- + + gt::gtensor d_b(d_a.shape()); + + generic_fill_1d(d_b, fill_value); + + gt::gtensor h_a(d_a.shape()); + gt::copy(d_a, h_a); + gt::gtensor h_b(d_a.shape()); + gt::copy(d_b, h_b); + + EXPECT_EQ(h_a, h_b); +} From f6d04c0b59180dd629598c2838af448511a55ae4 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Mon, 21 Aug 2023 13:32:55 +0200 Subject: [PATCH 09/38] [test] Implicit kernel haxpy on host + device --- tests/test_half.cxx | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index 3aab6aee..dbd60bd8 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -101,3 +101,34 @@ TEST(half, HaxpyExplicit1dDevice) EXPECT_EQ(h_y, ref); } + +TEST(half, HaxpyImplicit1dHost) +{ + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); + gt::half a{0.5}; + gt::gtensor ref(x.shape(), 3.25); + + y = a * x + y; + + EXPECT_EQ(y, ref); +} + +TEST(half, HaxpyImplicit1dDevice) +{ + gt::gtensor h_x(gt::shape(3), 1.5); + gt::gtensor h_y(h_x.shape(), 2.5); + gt::half a{0.5}; + gt::gtensor ref(h_x.shape(), 3.25); + + gt::gtensor d_x(h_x.shape()); + gt::gtensor d_y(h_y.shape()); + gt::copy(h_x, d_x); + gt::copy(h_y, d_y); + + d_y = a * d_x + d_y; + + gt::copy(d_y, h_y); + + EXPECT_EQ(h_y, ref); +} From f8998920909430c9f20ac700f5f09d0c0242f501 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Mon, 21 Aug 2023 14:15:10 +0200 Subject: [PATCH 10/38] [fix/test] Fix failure of device half auto init + Fixes problem decribed in e5ea095 by enabling default constructor of gt::half --- include/gtensor/half.h | 1 + tests/test_half_failing.cxx | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index e54abb03..742d23ce 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -18,6 +18,7 @@ namespace gt class half { public: + half() = default; TARGET_ARCH half(float x) : x(x) {}; TARGET_ARCH half(__half x) : x(x) {}; TARGET_ARCH const half& operator=(const float f) { x = f; return *this; } diff --git a/tests/test_half_failing.cxx b/tests/test_half_failing.cxx index 6a5c82b8..a02b3807 100644 --- a/tests/test_half_failing.cxx +++ b/tests/test_half_failing.cxx @@ -55,11 +55,11 @@ TEST(halfFailing, AutoInitDeviceHalf) { gt::half fill_value{1.25}; // DOES NOT COMPILE !!! ---------------------------------------------------- - // gt::gtensor d_a(gt::shape(5), fill_value); - EXPECT_EQ(true, false); - // temporary workaround: - gt::gtensor d_a(gt::shape(5)); - generic_fill_1d(d_a, fill_value); + gt::gtensor d_a(gt::shape(5), fill_value); +// EXPECT_EQ(true, false); +// // temporary workaround: +// gt::gtensor d_a(gt::shape(5)); +// generic_fill_1d(d_a, fill_value); // ------------------------------------------------------------------------- gt::gtensor d_b(d_a.shape()); From fb56bb594ccc9611d0ddb7a5e54dc163e53ffc9c Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Mon, 21 Aug 2023 14:26:11 +0200 Subject: [PATCH 11/38] [test] Remove test_half_failing + Since device half auto init works since last commit + Corresponding test added to test_half --- tests/CMakeLists.txt | 1 - tests/test_half.cxx | 16 ++++++++ tests/test_half_failing.cxx | 75 ------------------------------------- 3 files changed, 16 insertions(+), 76 deletions(-) delete mode 100644 tests/test_half_failing.cxx diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 396b5caa..90538c02 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -44,7 +44,6 @@ add_gtensor_test(test_stream) add_gtensor_test(test_gtest_predicates) add_gtensor_test(test_sparse) add_gtensor_test(test_half) -add_gtensor_test(test_half_failing) if (GTENSOR_ENABLE_CLIB) add_executable(test_clib) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index dbd60bd8..b262c29e 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -48,6 +48,22 @@ TEST(half, AutoInitHost) EXPECT_EQ(a, b); } +TEST(half, AutoInitDevice) +{ + gt::half fill_value{1.25}; + gt::gtensor d_a(gt::shape(5), fill_value); + gt::gtensor d_b(d_a.shape()); + + generic_fill_1d(d_b, fill_value); + + gt::gtensor h_a(d_a.shape()); + gt::gtensor h_b(d_b.shape()); + gt::copy(d_a, h_a); + gt::copy(d_b, h_b); + + EXPECT_EQ(h_a, h_b); +} + void host_explicit_haxpy_1d(const gt::half& a, const gt::gtensor& x, gt::gtensor& y) diff --git a/tests/test_half_failing.cxx b/tests/test_half_failing.cxx deleted file mode 100644 index a02b3807..00000000 --- a/tests/test_half_failing.cxx +++ /dev/null @@ -1,75 +0,0 @@ -#include - -#include - -#include - -template -void generic_fill_1d(gt::gtensor& x, const fp_type& fill_value) -{ - auto k_x = x.to_kernel(); - - gt::launch<1, S>( - x.shape(), GT_LAMBDA(int i) { k_x(i) = fill_value; }); -} - -TEST(halfFailing, AutoInitHostFloat) -{ - float fill_value{1.25}; - gt::gtensor a(gt::shape(5), fill_value); - gt::gtensor b(a.shape()); - - generic_fill_1d(b, fill_value); - - EXPECT_EQ(a, b); -} - -TEST(halfFailing, AutoInitHostHalf) -{ - gt::half fill_value{1.25}; - gt::gtensor a(gt::shape(5), fill_value); - gt::gtensor b(a.shape()); - - generic_fill_1d(b, fill_value); - - EXPECT_EQ(a, b); -} - -TEST(halfFailing, AutoInitDeviceFloat) -{ - float fill_value{1.25}; - gt::gtensor d_a(gt::shape(5), fill_value); - gt::gtensor d_b(d_a.shape()); - - generic_fill_1d(d_b, fill_value); - - gt::gtensor h_a(d_a.shape()); - gt::copy(d_a, h_a); - gt::gtensor h_b(d_a.shape()); - gt::copy(d_b, h_b); - - EXPECT_EQ(h_a, h_b); -} - -TEST(halfFailing, AutoInitDeviceHalf) -{ - gt::half fill_value{1.25}; - // DOES NOT COMPILE !!! ---------------------------------------------------- - gt::gtensor d_a(gt::shape(5), fill_value); -// EXPECT_EQ(true, false); -// // temporary workaround: -// gt::gtensor d_a(gt::shape(5)); -// generic_fill_1d(d_a, fill_value); - // ------------------------------------------------------------------------- - - gt::gtensor d_b(d_a.shape()); - - generic_fill_1d(d_b, fill_value); - - gt::gtensor h_a(d_a.shape()); - gt::copy(d_a, h_a); - gt::gtensor h_b(d_a.shape()); - gt::copy(d_b, h_b); - - EXPECT_EQ(h_a, h_b); -} From 25608b829773ac7396576cede608e02e2704b919 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Mon, 21 Aug 2023 14:47:48 +0200 Subject: [PATCH 12/38] [test] Cleaner device tests with auto init --- tests/test_half.cxx | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index b262c29e..4ff22826 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -101,15 +101,11 @@ void generic_explicit_haxpy_1d( const gt::half& a, TEST(half, HaxpyExplicit1dDevice) { - gt::gtensor h_x(gt::shape(3), gt::half{1.5}); - gt::gtensor h_y(h_x.shape(), gt::half{2.5}); + gt::gtensor d_x(gt::shape(3), 1.5); + gt::gtensor d_y(d_x.shape(), 2.5); gt::half a{0.5}; - gt::gtensor ref(h_x.shape(), 3.25); - - gt::gtensor d_x(h_x.shape()); - gt::gtensor d_y(h_y.shape()); - gt::copy(h_x, d_x); - gt::copy(h_y, d_y); + gt::gtensor ref(d_y.shape(), 3.25); + gt::gtensor h_y(d_y.shape()); generic_explicit_haxpy_1d(a, d_x, d_y); @@ -132,15 +128,11 @@ TEST(half, HaxpyImplicit1dHost) TEST(half, HaxpyImplicit1dDevice) { - gt::gtensor h_x(gt::shape(3), 1.5); - gt::gtensor h_y(h_x.shape(), 2.5); + gt::gtensor d_x(gt::shape(3), 1.5); + gt::gtensor d_y(d_x.shape(), 2.5); gt::half a{0.5}; - gt::gtensor ref(h_x.shape(), 3.25); - - gt::gtensor d_x(h_x.shape()); - gt::gtensor d_y(h_y.shape()); - gt::copy(h_x, d_x); - gt::copy(h_y, d_y); + gt::gtensor ref(d_y.shape(), 3.25); + gt::gtensor h_y(d_y.shape()); d_y = a * d_x + d_y; From 8356a023e600110e1feea756e7a0fef86f01f642 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Mon, 21 Aug 2023 14:50:06 +0200 Subject: [PATCH 13/38] [clean] Uniform indentation in test_half --- tests/test_half.cxx | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index 4ff22826..a084ae4e 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -32,9 +32,9 @@ TEST(half, ScalarArithmetic) template void generic_fill_1d(gt::gtensor& x, const gt::half& fill_value) { - auto k_x = x.to_kernel(); + auto k_x = x.to_kernel(); - gt::launch<1, S>(x.shape(), GT_LAMBDA(int i) { k_x(i) = fill_value; }); + gt::launch<1, S>(x.shape(), GT_LAMBDA(int i) { k_x(i) = fill_value; }); } TEST(half, AutoInitHost) @@ -68,11 +68,11 @@ void host_explicit_haxpy_1d(const gt::half& a, const gt::gtensor& x, gt::gtensor& y) { - auto k_x = x.to_kernel(); - auto k_y = y.to_kernel(); + auto k_x = x.to_kernel(); + auto k_y = y.to_kernel(); - gt::launch_host<1>( - y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); + gt::launch_host<1>( + y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); } TEST(half, HaxpyExplicit1dHost) @@ -92,11 +92,11 @@ void generic_explicit_haxpy_1d( const gt::half& a, const gt::gtensor& x, gt::gtensor& y) { - auto k_x = x.to_kernel(); - auto k_y = y.to_kernel(); + auto k_x = x.to_kernel(); + auto k_y = y.to_kernel(); - gt::launch<1, S>( - y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); + gt::launch<1, S>( + y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); } TEST(half, HaxpyExplicit1dDevice) From 59d3c90cfcc3138acbbfef7db2104085360a7a33 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Mon, 21 Aug 2023 16:01:48 +0200 Subject: [PATCH 14/38] [refactor] Reduce # of directives --- include/gtensor/half.h | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index 742d23ce..3a45a2c7 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -11,8 +11,10 @@ namespace gt // half #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) #define TARGET_ARCH __host__ __device__ +using compute_type = __half; #else #define TARGET_ARCH +using compute_type = float; #endif class half @@ -22,54 +24,34 @@ class half TARGET_ARCH half(float x) : x(x) {}; TARGET_ARCH half(__half x) : x(x) {}; TARGET_ARCH const half& operator=(const float f) { x = f; return *this; } - TARGET_ARCH const __half& Get() const { return x; } + TARGET_ARCH compute_type Get() const { return static_cast(x); } private: __half x; }; TARGET_ARCH const half operator+(const half& lhs, const half& rhs) { -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) return half( lhs.Get() + rhs.Get() ); -#else - return half( float(lhs.Get()) + float(rhs.Get()) ); -#endif } TARGET_ARCH const half operator-(const half& lhs, const half& rhs) { -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) return half( lhs.Get() - rhs.Get() ); -#else - return half( float(lhs.Get()) - float(rhs.Get()) ); -#endif } TARGET_ARCH const half operator*(const half& lhs, const half& rhs) { -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) return half( lhs.Get() * rhs.Get() ); -#else - return half( float(lhs.Get()) * float(rhs.Get()) ); -#endif } TARGET_ARCH const half operator/(const half& lhs, const half& rhs) { -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) return half( lhs.Get() / rhs.Get() ); -#else - return half( float(lhs.Get()) / float(rhs.Get()) ); -#endif } TARGET_ARCH bool operator==(const half& lhs, const half& rhs) { -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) return lhs.Get() == rhs.Get(); -#else - return float(lhs.Get()) == float(rhs.Get()); -#endif } TARGET_ARCH bool operator!=(const half& lhs, const half& rhs) From 7937c8ca5efd8768c30ba76ae1abc7b2e81a5677 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Tue, 22 Aug 2023 10:31:58 +0200 Subject: [PATCH 15/38] [refactor] Macro for arithmetic operators --- include/gtensor/half.h | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index 3a45a2c7..fed3c745 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -23,31 +23,21 @@ class half half() = default; TARGET_ARCH half(float x) : x(x) {}; TARGET_ARCH half(__half x) : x(x) {}; + TARGET_ARCH const half& operator=(const float f) { x = f; return *this; } TARGET_ARCH compute_type Get() const { return static_cast(x); } private: __half x; }; -TARGET_ARCH const half operator+(const half& lhs, const half& rhs) -{ - return half( lhs.Get() + rhs.Get() ); -} - -TARGET_ARCH const half operator-(const half& lhs, const half& rhs) -{ - return half( lhs.Get() - rhs.Get() ); -} +#define PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(op) \ + TARGET_ARCH const half operator op(const half& lhs, const half& rhs) \ + { return half( lhs.Get() op rhs.Get() ); } -TARGET_ARCH const half operator*(const half& lhs, const half& rhs) -{ - return half( lhs.Get() * rhs.Get() ); -} - -TARGET_ARCH const half operator/(const half& lhs, const half& rhs) -{ - return half( lhs.Get() / rhs.Get() ); -} +PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(+); +PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(-); +PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(*); +PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(/); TARGET_ARCH bool operator==(const half& lhs, const half& rhs) { From f868ce8d42ae4029bb4244261a747be25887ede9 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Tue, 22 Aug 2023 10:37:49 +0200 Subject: [PATCH 16/38] [test] Test gt::half comparisons (==,!=,<,<=,>,>=) --- tests/test_half.cxx | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index a084ae4e..576139dd 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -29,6 +29,46 @@ TEST(half, ScalarArithmetic) EXPECT_EQ(c, ref); } +TEST(half, BinaryComparisonOperators) +{ + gt::half a{1.0}; + gt::half b{2.0}; + gt::half c{2.0}; + + EXPECT_EQ(a, a); + EXPECT_EQ(b, b); + EXPECT_EQ(b, c); + EXPECT_EQ(c, b); + EXPECT_EQ(c, c); + + EXPECT_NE(a, b); + EXPECT_NE(a, c); + EXPECT_NE(b, a); + EXPECT_NE(c, a); + + EXPECT_LT(a, b); + EXPECT_LT(a, c); + + EXPECT_LE(a, a); + EXPECT_LE(a, b); + EXPECT_LE(a, c); + EXPECT_LE(b, b); + EXPECT_LE(b, c); + EXPECT_LE(c, b); + EXPECT_LE(c, c); + + EXPECT_GT(b, a); + EXPECT_GT(c, a); + + EXPECT_GE(a, a); + EXPECT_GE(b, a); + EXPECT_GE(b, b); + EXPECT_GE(b, c); + EXPECT_GE(c, a); + EXPECT_GE(c, b); + EXPECT_GE(c, c); +} + template void generic_fill_1d(gt::gtensor& x, const gt::half& fill_value) { From 2a18632193a4e429791cfa6d536732ae38300f50 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Tue, 22 Aug 2023 10:38:49 +0200 Subject: [PATCH 17/38] [feat] Comparisons gt::half (==,!=,<,<=,>,>=) --- include/gtensor/half.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index fed3c745..9d76ad2c 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -49,6 +49,26 @@ TARGET_ARCH bool operator!=(const half& lhs, const half& rhs) return !(lhs == rhs); } +TARGET_ARCH bool operator<(const half& lhs, const half& rhs) +{ + return lhs.Get() < rhs.Get(); +} + +TARGET_ARCH bool operator<=(const half& lhs, const half& rhs) +{ + return lhs.Get() <= rhs.Get(); +} + +TARGET_ARCH bool operator>(const half& lhs, const half& rhs) +{ + return lhs.Get() > rhs.Get(); +} + +TARGET_ARCH bool operator>=(const half& lhs, const half& rhs) +{ + return lhs.Get() >= rhs.Get(); +} + std::ostream& operator<<(std::ostream& s, const half& h) { s << (float) h.Get(); return s; } From c4eeb25bf5ea1b449cdaab047f00557a516b2ef7 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Tue, 22 Aug 2023 10:42:32 +0200 Subject: [PATCH 18/38] [refactor] Macro for comparison operators --- include/gtensor/half.h | 39 ++++++++++----------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index 9d76ad2c..54bfd550 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -39,35 +39,16 @@ PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(-); PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(*); PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(/); -TARGET_ARCH bool operator==(const half& lhs, const half& rhs) -{ - return lhs.Get() == rhs.Get(); -} - -TARGET_ARCH bool operator!=(const half& lhs, const half& rhs) -{ - return !(lhs == rhs); -} - -TARGET_ARCH bool operator<(const half& lhs, const half& rhs) -{ - return lhs.Get() < rhs.Get(); -} - -TARGET_ARCH bool operator<=(const half& lhs, const half& rhs) -{ - return lhs.Get() <= rhs.Get(); -} - -TARGET_ARCH bool operator>(const half& lhs, const half& rhs) -{ - return lhs.Get() > rhs.Get(); -} - -TARGET_ARCH bool operator>=(const half& lhs, const half& rhs) -{ - return lhs.Get() >= rhs.Get(); -} +#define PROVIDE_HALF_COMPARISON_OPERATOR(op) \ + TARGET_ARCH bool operator op(const half& lhs, const half& rhs) \ + { return lhs.Get() op rhs.Get(); } + +PROVIDE_HALF_COMPARISON_OPERATOR(==); +PROVIDE_HALF_COMPARISON_OPERATOR(!=); +PROVIDE_HALF_COMPARISON_OPERATOR(<); +PROVIDE_HALF_COMPARISON_OPERATOR(<=); +PROVIDE_HALF_COMPARISON_OPERATOR(>); +PROVIDE_HALF_COMPARISON_OPERATOR(>=); std::ostream& operator<<(std::ostream& s, const half& h) { s << (float) h.Get(); return s; } From 4077de47496991ee19dcbebba34bf52c209a9155 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Wed, 23 Aug 2023 13:37:43 +0200 Subject: [PATCH 19/38] [test] Larger custom kernel using +,-,*,/ + Test scalar, implicit/explicit kernel, host/device results match --- tests/test_half.cxx | 61 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index 576139dd..4c5f94c1 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -180,3 +180,64 @@ TEST(half, HaxpyImplicit1dDevice) EXPECT_EQ(h_y, ref); } + +template +void generic_explicit_custom_kernel_1d( const gt::half& s1, + const gt::half& s2, + const gt::gtensor& a, + const gt::gtensor& b, + const gt::gtensor& c, + const gt::gtensor& d, + const gt::gtensor& e, + gt::gtensor& result) +{ + auto k_a = a.to_kernel(); + auto k_b = b.to_kernel(); + auto k_c = c.to_kernel(); + auto k_d = d.to_kernel(); + auto k_e = e.to_kernel(); + auto k_r = result.to_kernel(); + + gt::launch<1, S>(result.shape(), GT_LAMBDA(int i) + { k_r(i) = s2 - k_e(i) * ((k_a(i) - s1 * k_b(i)) / k_c(i) + k_d(i)); }); +} + +TEST(half, CustomKernelExplicitImplicitHostDevice) +{ + gt::half a_val{12.34}, b_val{2.345}, c_val{0.987}, d_val{0.67}, e_val{3.14}; + gt::half s1{0.1}, s2{4.56}; + + gt::half r = s2 - e_val * ((a_val - s1 * b_val) / c_val + d_val); + + auto shape = gt::shape(3); + + gt::gtensor h_a(shape, a_val); + gt::gtensor h_b(shape, b_val); + gt::gtensor h_c(shape, c_val); + gt::gtensor h_d(shape, d_val); + gt::gtensor h_e(shape, e_val); + gt::gtensor h_r_expl(shape); + gt::gtensor h_r_impl(shape); + + gt::gtensor d_a(shape, a_val); + gt::gtensor d_b(shape, b_val); + gt::gtensor d_c(shape, c_val); + gt::gtensor d_d(shape, d_val); + gt::gtensor d_e(shape, e_val); + gt::gtensor d_r_expl(shape); + gt::gtensor d_r_impl(shape); + + h_r_impl = s2 - h_e * ((h_a - s1 * h_b) / h_c + h_d); + d_r_impl = s2 - d_e * ((d_a - s1 * d_b) / d_c + d_d); + + generic_explicit_custom_kernel_1d(s1, s2, + h_a, h_b, h_c, h_d, h_e, h_r_expl); + + generic_explicit_custom_kernel_1d(s1, s2, + d_a, d_b, d_c, d_d, d_e, d_r_expl); + + EXPECT_EQ(h_r_impl(2), r); + EXPECT_EQ(h_r_impl, h_r_expl); + EXPECT_EQ(h_r_impl, d_r_expl); + EXPECT_EQ(h_r_impl, d_r_impl); +} From 558b9e0fa5fb58898aa2c31cae95169c205923d4 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Wed, 23 Aug 2023 16:00:58 +0200 Subject: [PATCH 20/38] [clean] Replace C-cast by static_cast --- include/gtensor/half.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index 54bfd550..ec3b2664 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -51,7 +51,7 @@ PROVIDE_HALF_COMPARISON_OPERATOR(>); PROVIDE_HALF_COMPARISON_OPERATOR(>=); std::ostream& operator<<(std::ostream& s, const half& h) -{ s << (float) h.Get(); return s; } +{ s << static_cast(h.Get()); return s; } } // namespace gt From 796d394ab9955999be88dc5182ee5704d8482637 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Wed, 23 Aug 2023 16:01:43 +0200 Subject: [PATCH 21/38] [clean] Remove const on arithm. return type --- include/gtensor/half.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index ec3b2664..ded36a8a 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -31,7 +31,7 @@ class half }; #define PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(op) \ - TARGET_ARCH const half operator op(const half& lhs, const half& rhs) \ + TARGET_ARCH half operator op(const half& lhs, const half& rhs) \ { return half( lhs.Get() op rhs.Get() ); } PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(+); From 5d74de64f1f1b1af931f771fa9805e37d37db458 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Wed, 23 Aug 2023 16:22:29 +0200 Subject: [PATCH 22/38] [test/clean] Shorten test by comparing device vecs --- tests/test_half.cxx | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index 4c5f94c1..7c25eb53 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -91,17 +91,12 @@ TEST(half, AutoInitHost) TEST(half, AutoInitDevice) { gt::half fill_value{1.25}; - gt::gtensor d_a(gt::shape(5), fill_value); - gt::gtensor d_b(d_a.shape()); + gt::gtensor a(gt::shape(5), fill_value); + gt::gtensor b(a.shape()); - generic_fill_1d(d_b, fill_value); + generic_fill_1d(b, fill_value); - gt::gtensor h_a(d_a.shape()); - gt::gtensor h_b(d_b.shape()); - gt::copy(d_a, h_a); - gt::copy(d_b, h_b); - - EXPECT_EQ(h_a, h_b); + EXPECT_EQ(a, b); } void host_explicit_haxpy_1d(const gt::half& a, @@ -141,17 +136,14 @@ void generic_explicit_haxpy_1d( const gt::half& a, TEST(half, HaxpyExplicit1dDevice) { - gt::gtensor d_x(gt::shape(3), 1.5); - gt::gtensor d_y(d_x.shape(), 2.5); + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); gt::half a{0.5}; - gt::gtensor ref(d_y.shape(), 3.25); - gt::gtensor h_y(d_y.shape()); + gt::gtensor ref(y.shape(), 3.25); - generic_explicit_haxpy_1d(a, d_x, d_y); + generic_explicit_haxpy_1d(a, x, y); - gt::copy(d_y, h_y); - - EXPECT_EQ(h_y, ref); + EXPECT_EQ(y, ref); } TEST(half, HaxpyImplicit1dHost) @@ -168,17 +160,14 @@ TEST(half, HaxpyImplicit1dHost) TEST(half, HaxpyImplicit1dDevice) { - gt::gtensor d_x(gt::shape(3), 1.5); - gt::gtensor d_y(d_x.shape(), 2.5); + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); gt::half a{0.5}; - gt::gtensor ref(d_y.shape(), 3.25); - gt::gtensor h_y(d_y.shape()); + gt::gtensor ref(y.shape(), 3.25); - d_y = a * d_x + d_y; - - gt::copy(d_y, h_y); + y = a * x + y; - EXPECT_EQ(h_y, ref); + EXPECT_EQ(y, ref); } template From 2bf15eb1797e12d4fae4a3b31c92aee1217bb20a Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Thu, 24 Aug 2023 17:00:52 +0200 Subject: [PATCH 23/38] [test/feat] Mix precision gt::half w. float/double --- include/gtensor/half.h | 40 ++++++++++++++++++++++++++ tests/test_half.cxx | 64 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index ded36a8a..bd85e6fa 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -34,15 +34,41 @@ class half TARGET_ARCH half operator op(const half& lhs, const half& rhs) \ { return half( lhs.Get() op rhs.Get() ); } +#define PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(op, fp_type) \ + \ + TARGET_ARCH fp_type operator op(const half& lhs, const fp_type& rhs) \ + { return static_cast(lhs.Get()) op rhs; } \ + \ + TARGET_ARCH fp_type operator op(const fp_type& lhs, const half& rhs) \ + { return lhs op static_cast(rhs.Get()); } + PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(+); PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(-); PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(*); PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(/); +PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(+, float); +PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(-, float); +PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(*, float); +PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(/, float); + +PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(+, double); +PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(-, double); +PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(*, double); +PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(/, double); + #define PROVIDE_HALF_COMPARISON_OPERATOR(op) \ TARGET_ARCH bool operator op(const half& lhs, const half& rhs) \ { return lhs.Get() op rhs.Get(); } +#define PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(op, fp_type) \ + \ + TARGET_ARCH bool operator op(const half& lhs, const fp_type& rhs) \ + { return static_cast(lhs.Get()) op rhs; } \ + \ + TARGET_ARCH bool operator op(const fp_type& lhs, const half& rhs) \ + { return lhs op static_cast(rhs.Get()); } + PROVIDE_HALF_COMPARISON_OPERATOR(==); PROVIDE_HALF_COMPARISON_OPERATOR(!=); PROVIDE_HALF_COMPARISON_OPERATOR(<); @@ -50,6 +76,20 @@ PROVIDE_HALF_COMPARISON_OPERATOR(<=); PROVIDE_HALF_COMPARISON_OPERATOR(>); PROVIDE_HALF_COMPARISON_OPERATOR(>=); +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(==, float); +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(!=, float); +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(<, float); +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(<=, float); +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(>, float); +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(>=, float); + +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(==, double); +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(!=, double); +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(<, double); +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(<=, double); +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(>, double); +PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(>=, double); + std::ostream& operator<<(std::ostream& s, const half& h) { s << static_cast(h.Get()); return s; } diff --git a/tests/test_half.cxx b/tests/test_half.cxx index 7c25eb53..21d41923 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -230,3 +230,67 @@ TEST(half, CustomKernelExplicitImplicitHostDevice) EXPECT_EQ(h_r_impl, d_r_expl); EXPECT_EQ(h_r_impl, d_r_impl); } + +TEST(half, MixedPrecisionScalar) +{ + gt::half a_half{1.0}; + + gt::half b_half{2.0}; + float b_float{2.0}; + double b_double{2.0}; + + auto c_half = a_half + b_half; + auto c_float = a_half + b_float; + auto c_double = a_half + b_double; + + EXPECT_TRUE((std::is_same::value)); + EXPECT_TRUE((std::is_same::value)); + EXPECT_TRUE((std::is_same::value)); + + EXPECT_EQ(c_half, c_float); + EXPECT_EQ(c_half, c_double); +} + +TEST(half, MixedPrecisionHost) +{ + auto shape = gt::shape(3); + gt::gtensor vh(shape, 4.0); + gt::gtensor vf(shape, 3.0); + gt::gtensor vd(shape, 2.0); + + gt::gtensor rh(shape); + gt::gtensor rf(shape); + gt::gtensor rd(shape); + + gt::gtensor ref(shape, 10.0); + + rh = (vh * vf) - (vh / vd); + rf = (vh * vf) - (vh / vd); + rd = (vh * vf) - (vh / vd); + + EXPECT_EQ(ref, rh); + EXPECT_EQ(ref, rf); + EXPECT_EQ(ref, rd); +} + +TEST(half, MixedPrecisionDevice) +{ + auto shape = gt::shape(3); + gt::gtensor vh(shape, 4.0); + gt::gtensor vf(shape, 3.0); + gt::gtensor vd(shape, 2.0); + + gt::gtensor rh(shape); + gt::gtensor rf(shape); + gt::gtensor rd(shape); + + gt::gtensor ref(shape, 10.0); + + rh = (vh * vf) - (vh / vd); + rf = (vh * vf) - (vh / vd); + rd = (vh * vf) - (vh / vd); + + EXPECT_EQ(ref, rh); + EXPECT_EQ(ref, rf); + EXPECT_EQ(ref, rd); +} From 20ebf2ff082044dad355b655e070cf3520938ef1 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Thu, 24 Aug 2023 17:06:32 +0200 Subject: [PATCH 24/38] [fix] #undef macros in half.h --- include/gtensor/half.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index bd85e6fa..6ae3212c 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -93,6 +93,12 @@ PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(>=, double); std::ostream& operator<<(std::ostream& s, const half& h) { s << static_cast(h.Get()); return s; } +#undef TARGET_ARCH +#undef PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR +#undef PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR +#undef PROVIDE_HALF_COMPARISON_OPERATOR +#undef PROVIDE_MIXED_HALF_COMPARISON_OPERATOR + } // namespace gt #endif From e887e9c56e22664456fecef0a14d308ac329bc94 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Fri, 25 Aug 2023 10:18:20 +0200 Subject: [PATCH 25/38] [fix] Build cuda for compute capability < 5.3 --- include/gtensor/half.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index 6ae3212c..dd3e6a7b 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -9,11 +9,15 @@ namespace gt // ====================================================================== // half -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) +#ifdef __CUDA_ARCH__ #define TARGET_ARCH __host__ __device__ -using compute_type = __half; #else #define TARGET_ARCH +#endif + +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) +using compute_type = __half; +#else using compute_type = float; #endif From 7a05b882d2b2160983f58bdada25040fb6a15b53 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Mon, 28 Aug 2023 13:56:37 +0200 Subject: [PATCH 26/38] [fix] Inline gt::half methods/ops via GT_INLINE --- include/gtensor/half.h | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index dd3e6a7b..f84e9858 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -3,17 +3,13 @@ #include #include +#include namespace gt { // ====================================================================== // half -#ifdef __CUDA_ARCH__ -#define TARGET_ARCH __host__ __device__ -#else -#define TARGET_ARCH -#endif #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) using compute_type = __half; @@ -25,25 +21,25 @@ class half { public: half() = default; - TARGET_ARCH half(float x) : x(x) {}; - TARGET_ARCH half(__half x) : x(x) {}; + GT_INLINE half(float x) : x(x) {}; + GT_INLINE half(__half x) : x(x) {}; - TARGET_ARCH const half& operator=(const float f) { x = f; return *this; } - TARGET_ARCH compute_type Get() const { return static_cast(x); } + GT_INLINE const half& operator=(const float f) { x = f; return *this; } + GT_INLINE compute_type Get() const { return static_cast(x); } private: __half x; }; #define PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(op) \ - TARGET_ARCH half operator op(const half& lhs, const half& rhs) \ + GT_INLINE half operator op(const half& lhs, const half& rhs) \ { return half( lhs.Get() op rhs.Get() ); } #define PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(op, fp_type) \ \ - TARGET_ARCH fp_type operator op(const half& lhs, const fp_type& rhs) \ + GT_INLINE fp_type operator op(const half& lhs, const fp_type& rhs) \ { return static_cast(lhs.Get()) op rhs; } \ \ - TARGET_ARCH fp_type operator op(const fp_type& lhs, const half& rhs) \ + GT_INLINE fp_type operator op(const fp_type& lhs, const half& rhs) \ { return lhs op static_cast(rhs.Get()); } PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(+); @@ -62,15 +58,15 @@ PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(*, double); PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(/, double); #define PROVIDE_HALF_COMPARISON_OPERATOR(op) \ - TARGET_ARCH bool operator op(const half& lhs, const half& rhs) \ + GT_INLINE bool operator op(const half& lhs, const half& rhs) \ { return lhs.Get() op rhs.Get(); } #define PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(op, fp_type) \ \ - TARGET_ARCH bool operator op(const half& lhs, const fp_type& rhs) \ + GT_INLINE bool operator op(const half& lhs, const fp_type& rhs) \ { return static_cast(lhs.Get()) op rhs; } \ \ - TARGET_ARCH bool operator op(const fp_type& lhs, const half& rhs) \ + GT_INLINE bool operator op(const fp_type& lhs, const half& rhs) \ { return lhs op static_cast(rhs.Get()); } PROVIDE_HALF_COMPARISON_OPERATOR(==); @@ -97,7 +93,6 @@ PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(>=, double); std::ostream& operator<<(std::ostream& s, const half& h) { s << static_cast(h.Get()); return s; } -#undef TARGET_ARCH #undef PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR #undef PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR #undef PROVIDE_HALF_COMPARISON_OPERATOR From 7572edc7ed2fffad74071a6e589fa9920f81fba0 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Mon, 28 Aug 2023 14:05:33 +0200 Subject: [PATCH 27/38] [test/clean] Use snake_case for test names --- tests/test_half.cxx | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index 21d41923..3689ac46 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -4,7 +4,7 @@ #include -TEST(half, ScalarArithmetic) +TEST(half, scalar_arithmetic) { gt::half a{1.0}; gt::half b{2.0}; @@ -29,7 +29,7 @@ TEST(half, ScalarArithmetic) EXPECT_EQ(c, ref); } -TEST(half, BinaryComparisonOperators) +TEST(half, binary_comparison_operators) { gt::half a{1.0}; gt::half b{2.0}; @@ -77,7 +77,7 @@ void generic_fill_1d(gt::gtensor& x, const gt::half& fill_value) gt::launch<1, S>(x.shape(), GT_LAMBDA(int i) { k_x(i) = fill_value; }); } -TEST(half, AutoInitHost) +TEST(half, auto_init_host) { gt::half fill_value{1.25}; gt::gtensor a(gt::shape(5), fill_value); @@ -88,7 +88,7 @@ TEST(half, AutoInitHost) EXPECT_EQ(a, b); } -TEST(half, AutoInitDevice) +TEST(half, auto_init_device) { gt::half fill_value{1.25}; gt::gtensor a(gt::shape(5), fill_value); @@ -110,7 +110,7 @@ void host_explicit_haxpy_1d(const gt::half& a, y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); } -TEST(half, HaxpyExplicit1dHost) +TEST(half, haxpy_explicit_1d_host) { gt::gtensor x(gt::shape(3), 1.5); gt::gtensor y(x.shape(), 2.5); @@ -134,7 +134,7 @@ void generic_explicit_haxpy_1d( const gt::half& a, y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); } -TEST(half, HaxpyExplicit1dDevice) +TEST(half, haxpy_explicit_1d_device) { gt::gtensor x(gt::shape(3), 1.5); gt::gtensor y(x.shape(), 2.5); @@ -146,7 +146,7 @@ TEST(half, HaxpyExplicit1dDevice) EXPECT_EQ(y, ref); } -TEST(half, HaxpyImplicit1dHost) +TEST(half, haxpy_implicit_1d_host) { gt::gtensor x(gt::shape(3), 1.5); gt::gtensor y(x.shape(), 2.5); @@ -158,7 +158,7 @@ TEST(half, HaxpyImplicit1dHost) EXPECT_EQ(y, ref); } -TEST(half, HaxpyImplicit1dDevice) +TEST(half, haxpy_implicit_1d_device) { gt::gtensor x(gt::shape(3), 1.5); gt::gtensor y(x.shape(), 2.5); @@ -191,7 +191,7 @@ void generic_explicit_custom_kernel_1d( const gt::half& s1, { k_r(i) = s2 - k_e(i) * ((k_a(i) - s1 * k_b(i)) / k_c(i) + k_d(i)); }); } -TEST(half, CustomKernelExplicitImplicitHostDevice) +TEST(half, custom_kernel_explicit_implicit_host_device) { gt::half a_val{12.34}, b_val{2.345}, c_val{0.987}, d_val{0.67}, e_val{3.14}; gt::half s1{0.1}, s2{4.56}; @@ -231,7 +231,7 @@ TEST(half, CustomKernelExplicitImplicitHostDevice) EXPECT_EQ(h_r_impl, d_r_impl); } -TEST(half, MixedPrecisionScalar) +TEST(half, mixed_precision_scalar) { gt::half a_half{1.0}; @@ -251,7 +251,7 @@ TEST(half, MixedPrecisionScalar) EXPECT_EQ(c_half, c_double); } -TEST(half, MixedPrecisionHost) +TEST(half, mixed_precision_host) { auto shape = gt::shape(3); gt::gtensor vh(shape, 4.0); @@ -273,7 +273,7 @@ TEST(half, MixedPrecisionHost) EXPECT_EQ(ref, rd); } -TEST(half, MixedPrecisionDevice) +TEST(half, mixed_precision_device) { auto shape = gt::shape(3); gt::gtensor vh(shape, 4.0); From e3bb3be6e01102af65719dd72773b51da58284d3 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Mon, 28 Aug 2023 14:08:02 +0200 Subject: [PATCH 28/38] [clean] Rename 1d by 1D in tests --- tests/test_half.cxx | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index 3689ac46..0f97ced8 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -70,7 +70,7 @@ TEST(half, binary_comparison_operators) } template -void generic_fill_1d(gt::gtensor& x, const gt::half& fill_value) +void generic_fill_1D(gt::gtensor& x, const gt::half& fill_value) { auto k_x = x.to_kernel(); @@ -83,7 +83,7 @@ TEST(half, auto_init_host) gt::gtensor a(gt::shape(5), fill_value); gt::gtensor b(a.shape()); - generic_fill_1d(b, fill_value); + generic_fill_1D(b, fill_value); EXPECT_EQ(a, b); } @@ -94,12 +94,12 @@ TEST(half, auto_init_device) gt::gtensor a(gt::shape(5), fill_value); gt::gtensor b(a.shape()); - generic_fill_1d(b, fill_value); + generic_fill_1D(b, fill_value); EXPECT_EQ(a, b); } -void host_explicit_haxpy_1d(const gt::half& a, +void host_explicit_haxpy_1D(const gt::half& a, const gt::gtensor& x, gt::gtensor& y) { @@ -110,20 +110,20 @@ void host_explicit_haxpy_1d(const gt::half& a, y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); } -TEST(half, haxpy_explicit_1d_host) +TEST(half, haxpy_explicit_1D_host) { gt::gtensor x(gt::shape(3), 1.5); gt::gtensor y(x.shape(), 2.5); gt::half a{0.5}; gt::gtensor ref(x.shape(), 3.25); - host_explicit_haxpy_1d(a, x, y); + host_explicit_haxpy_1D(a, x, y); EXPECT_EQ(y, ref); } template -void generic_explicit_haxpy_1d( const gt::half& a, +void generic_explicit_haxpy_1D( const gt::half& a, const gt::gtensor& x, gt::gtensor& y) { @@ -134,19 +134,19 @@ void generic_explicit_haxpy_1d( const gt::half& a, y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); } -TEST(half, haxpy_explicit_1d_device) +TEST(half, haxpy_explicit_1D_device) { gt::gtensor x(gt::shape(3), 1.5); gt::gtensor y(x.shape(), 2.5); gt::half a{0.5}; gt::gtensor ref(y.shape(), 3.25); - generic_explicit_haxpy_1d(a, x, y); + generic_explicit_haxpy_1D(a, x, y); EXPECT_EQ(y, ref); } -TEST(half, haxpy_implicit_1d_host) +TEST(half, haxpy_implicit_1D_host) { gt::gtensor x(gt::shape(3), 1.5); gt::gtensor y(x.shape(), 2.5); @@ -158,7 +158,7 @@ TEST(half, haxpy_implicit_1d_host) EXPECT_EQ(y, ref); } -TEST(half, haxpy_implicit_1d_device) +TEST(half, haxpy_implicit_1D_device) { gt::gtensor x(gt::shape(3), 1.5); gt::gtensor y(x.shape(), 2.5); @@ -171,7 +171,7 @@ TEST(half, haxpy_implicit_1d_device) } template -void generic_explicit_custom_kernel_1d( const gt::half& s1, +void generic_explicit_custom_kernel_1D( const gt::half& s1, const gt::half& s2, const gt::gtensor& a, const gt::gtensor& b, @@ -219,10 +219,10 @@ TEST(half, custom_kernel_explicit_implicit_host_device) h_r_impl = s2 - h_e * ((h_a - s1 * h_b) / h_c + h_d); d_r_impl = s2 - d_e * ((d_a - s1 * d_b) / d_c + d_d); - generic_explicit_custom_kernel_1d(s1, s2, + generic_explicit_custom_kernel_1D(s1, s2, h_a, h_b, h_c, h_d, h_e, h_r_expl); - generic_explicit_custom_kernel_1d(s1, s2, + generic_explicit_custom_kernel_1D(s1, s2, d_a, d_b, d_c, d_d, d_e, d_r_expl); EXPECT_EQ(h_r_impl(2), r); From 996f052a247210e8a5dbe42a8c8aa5a531dbec40 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Mon, 28 Aug 2023 14:16:43 +0200 Subject: [PATCH 29/38] [clean] Helper function to reduce code duplication --- tests/test_half.cxx | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index 0f97ced8..2b0e7928 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -251,18 +251,19 @@ TEST(half, mixed_precision_scalar) EXPECT_EQ(c_half, c_double); } -TEST(half, mixed_precision_host) +template +void test_mixed_precision_helper() { auto shape = gt::shape(3); - gt::gtensor vh(shape, 4.0); - gt::gtensor vf(shape, 3.0); - gt::gtensor vd(shape, 2.0); + gt::gtensor vh(shape, 4.0); + gt::gtensor vf(shape, 3.0); + gt::gtensor vd(shape, 2.0); - gt::gtensor rh(shape); - gt::gtensor rf(shape); - gt::gtensor rd(shape); + gt::gtensor rh(shape); + gt::gtensor rf(shape); + gt::gtensor rd(shape); - gt::gtensor ref(shape, 10.0); + gt::gtensor ref(shape, 10.0); rh = (vh * vf) - (vh / vd); rf = (vh * vf) - (vh / vd); @@ -273,24 +274,12 @@ TEST(half, mixed_precision_host) EXPECT_EQ(ref, rd); } -TEST(half, mixed_precision_device) +TEST(half, mixed_precision_host) { - auto shape = gt::shape(3); - gt::gtensor vh(shape, 4.0); - gt::gtensor vf(shape, 3.0); - gt::gtensor vd(shape, 2.0); - - gt::gtensor rh(shape); - gt::gtensor rf(shape); - gt::gtensor rd(shape); - - gt::gtensor ref(shape, 10.0); - - rh = (vh * vf) - (vh / vd); - rf = (vh * vf) - (vh / vd); - rd = (vh * vf) - (vh / vd); + test_mixed_precision_helper(); +} - EXPECT_EQ(ref, rh); - EXPECT_EQ(ref, rf); - EXPECT_EQ(ref, rd); +TEST(half, mixed_precision_device) +{ + test_mixed_precision_helper(); } From 37bbab0151456dae3b2d7e4a33a98e614de175df Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Mon, 28 Aug 2023 14:30:42 +0200 Subject: [PATCH 30/38] [clean] Remove host-helper: covered by generic one --- tests/test_half.cxx | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/tests/test_half.cxx b/tests/test_half.cxx index 2b0e7928..9705c381 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -99,14 +99,15 @@ TEST(half, auto_init_device) EXPECT_EQ(a, b); } -void host_explicit_haxpy_1D(const gt::half& a, - const gt::gtensor& x, - gt::gtensor& y) +template +void generic_explicit_haxpy_1D( const gt::half& a, + const gt::gtensor& x, + gt::gtensor& y) { auto k_x = x.to_kernel(); auto k_y = y.to_kernel(); - gt::launch_host<1>( + gt::launch<1, S>( y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); } @@ -117,23 +118,11 @@ TEST(half, haxpy_explicit_1D_host) gt::half a{0.5}; gt::gtensor ref(x.shape(), 3.25); - host_explicit_haxpy_1D(a, x, y); + generic_explicit_haxpy_1D(a, x, y); EXPECT_EQ(y, ref); } -template -void generic_explicit_haxpy_1D( const gt::half& a, - const gt::gtensor& x, - gt::gtensor& y) -{ - auto k_x = x.to_kernel(); - auto k_y = y.to_kernel(); - - gt::launch<1, S>( - y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); -} - TEST(half, haxpy_explicit_1D_device) { gt::gtensor x(gt::shape(3), 1.5); From ad4968f1dcbdd7c361854b8fcc1fd05d3ed69817 Mon Sep 17 00:00:00 2001 From: Carl-Martin Pfeiler Date: Tue, 29 Aug 2023 15:28:10 +0200 Subject: [PATCH 31/38] [clean] Undef helper macros at end of file --- include/gtensor/half.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index f84e9858..ef49ad21 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -93,11 +93,11 @@ PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(>=, double); std::ostream& operator<<(std::ostream& s, const half& h) { s << static_cast(h.Get()); return s; } +} // namespace gt + #undef PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR #undef PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR #undef PROVIDE_HALF_COMPARISON_OPERATOR #undef PROVIDE_MIXED_HALF_COMPARISON_OPERATOR -} // namespace gt - #endif From dffc73076a7da1c28a1e86160daa84faa4d9cddf Mon Sep 17 00:00:00 2001 From: Carl-Martin Pfeiler Date: Tue, 29 Aug 2023 18:08:36 +0200 Subject: [PATCH 32/38] [fix] Keep cuda_fp16.h out of default incl. chain + TODO Alternative gt::half storage_type (e.g., _Float16) --- include/gtensor/half.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index ef49ad21..a9dc4b4f 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -2,7 +2,15 @@ #define GTENSOR_HALF_H #include + +#if __has_include() #include +#define GTENSOR_FP16_CUDA_HEADER +#elif 0 // TODO check if other fp16 type available, e.g., _Float16 +#else +#error "No half precision floating point type available." +#endif + #include namespace gt @@ -11,7 +19,15 @@ namespace gt // ====================================================================== // half -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 530) +#if defined(GTENSOR_FP16_CUDA_HEADER) +using storage_type = __half; +#else +#error "No half precision floating point type available." +#endif + +#if defined(GTENSOR_FP16_CUDA_HEADER) \ + && defined(__CUDA_ARCH__) \ + && (__CUDA_ARCH__ >= 530) using compute_type = __half; #else using compute_type = float; @@ -22,12 +38,12 @@ class half public: half() = default; GT_INLINE half(float x) : x(x) {}; - GT_INLINE half(__half x) : x(x) {}; + GT_INLINE half(storage_type x) : x(x) {}; GT_INLINE const half& operator=(const float f) { x = f; return *this; } GT_INLINE compute_type Get() const { return static_cast(x); } private: - __half x; + storage_type x; }; #define PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(op) \ From 0e79cd026bce46c99583d661effa1c9a0fc1a058 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Wed, 30 Aug 2023 12:41:59 +0200 Subject: [PATCH 33/38] [build] Added cmake option GTENSOR_ENABLE_FP16 --- CMakeLists.txt | 8 ++++++++ include/gtensor/gtensor.h | 4 ++++ include/gtensor/half.h | 4 ++-- tests/CMakeLists.txt | 5 ++++- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b58305a..ca21e8cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,7 @@ option(GTENSOR_ALLOCATOR_CACHING "Enable naive caching allocators" ON) option(GTENSOR_BOUNDS_CHECK "Enable per access bounds checking" OFF) option(GTENSOR_ADDRESS_CHECK "Enable address checking for device spans" OFF) option(GTENSOR_SYNC_KERNELS "Enable host sync after assign and launch kernels" OFF) +option(GTENSOR_ENABLE_FP16 "Enable 16-bit floating point type gt::half" OFF) if (GTENSOR_ENABLE_FORTRAN) # do this early (here) since later the `enable_language(Fortran)` gives me trouble @@ -335,6 +336,13 @@ else() message(STATUS "${PROJECT_NAME}: sync kernels is OFF") endif() +if (GTENSOR_ENABLE_FP16) + message(STATUS "${PROJECT_NAME}: gt::half is ENABLED") + target_compile_definitions(gtensor_${GTENSOR_DEVICE} + INTERFACE GTENSOR_ENABLE_FP16) +endif() + + target_compile_definitions(gtensor_${GTENSOR_DEVICE} INTERFACE GTENSOR_MANAGED_MEMORY_TYPE_DEFAULT=${GTENSOR_MANAGED_MEMORY_TYPE_DEFAULT}) message(STATUS "${PROJECT_NAME}: default managed memory type '${GTENSOR_MANAGED_MEMORY_TYPE_DEFAULT}'") diff --git a/include/gtensor/gtensor.h b/include/gtensor/gtensor.h index 507f0d20..70dee7d3 100644 --- a/include/gtensor/gtensor.h +++ b/include/gtensor/gtensor.h @@ -19,6 +19,10 @@ #include "operator.h" #include "space.h" +#if defined(GTENSOR_ENABLE_FP16) +#include "half.h" +#endif + namespace gt { diff --git a/include/gtensor/half.h b/include/gtensor/half.h index a9dc4b4f..6b29c1fe 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -8,7 +8,7 @@ #define GTENSOR_FP16_CUDA_HEADER #elif 0 // TODO check if other fp16 type available, e.g., _Float16 #else -#error "No half precision floating point type available." +#error "GTENSOR_ENABLE_FP16=ON, but no 16-bit FP type available!" #endif #include @@ -22,7 +22,7 @@ namespace gt #if defined(GTENSOR_FP16_CUDA_HEADER) using storage_type = __half; #else -#error "No half precision floating point type available." +#error "GTENSOR_ENABLE_FP16=ON, but no 16-bit FP type available!" #endif #if defined(GTENSOR_FP16_CUDA_HEADER) \ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 90538c02..41a4f10d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -43,7 +43,6 @@ add_gtensor_test(test_space) add_gtensor_test(test_stream) add_gtensor_test(test_gtest_predicates) add_gtensor_test(test_sparse) -add_gtensor_test(test_half) if (GTENSOR_ENABLE_CLIB) add_executable(test_clib) @@ -81,3 +80,7 @@ if (GTENSOR_ENABLE_FFT) add_gtensor_test(test_fft) target_link_libraries(test_fft gtfft) endif() + +if (GTENSOR_ENABLE_FP16) + add_gtensor_test(test_half) +endif() From 1708453e68a30dfc3a45878b33bcdca5cdc22b03 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Thu, 7 Sep 2023 14:11:39 +0200 Subject: [PATCH 34/38] [rename] Change gt::half to gt::float16_t --- include/gtensor/half.h | 114 +++++++++++++-------------- tests/test_half.cxx | 170 ++++++++++++++++++++--------------------- 2 files changed, 142 insertions(+), 142 deletions(-) diff --git a/include/gtensor/half.h b/include/gtensor/half.h index 6b29c1fe..5e21701f 100644 --- a/include/gtensor/half.h +++ b/include/gtensor/half.h @@ -1,5 +1,5 @@ -#ifndef GTENSOR_HALF_H -#define GTENSOR_HALF_H +#ifndef GTENSOR_FLOAT16T_H +#define GTENSOR_FLOAT16T_H #include @@ -17,7 +17,7 @@ namespace gt { // ====================================================================== -// half +// float16_t #if defined(GTENSOR_FP16_CUDA_HEADER) using storage_type = __half; @@ -33,87 +33,87 @@ using compute_type = __half; using compute_type = float; #endif -class half +class float16_t { public: - half() = default; - GT_INLINE half(float x) : x(x) {}; - GT_INLINE half(storage_type x) : x(x) {}; + float16_t() = default; + GT_INLINE float16_t(float x) : x(x) {}; + GT_INLINE float16_t(storage_type x) : x(x) {}; - GT_INLINE const half& operator=(const float f) { x = f; return *this; } + GT_INLINE const float16_t& operator=(const float f) { x = f; return *this; } GT_INLINE compute_type Get() const { return static_cast(x); } private: storage_type x; }; -#define PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(op) \ - GT_INLINE half operator op(const half& lhs, const half& rhs) \ - { return half( lhs.Get() op rhs.Get() ); } +#define PROVIDE_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(op) \ + GT_INLINE float16_t operator op(const float16_t& lhs, const float16_t& rhs) \ + { return float16_t( lhs.Get() op rhs.Get() ); } -#define PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(op, fp_type) \ +#define PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(op, fp_type) \ \ - GT_INLINE fp_type operator op(const half& lhs, const fp_type& rhs) \ + GT_INLINE fp_type operator op(const float16_t& lhs, const fp_type& rhs) \ { return static_cast(lhs.Get()) op rhs; } \ \ - GT_INLINE fp_type operator op(const fp_type& lhs, const half& rhs) \ + GT_INLINE fp_type operator op(const fp_type& lhs, const float16_t& rhs) \ { return lhs op static_cast(rhs.Get()); } -PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(+); -PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(-); -PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(*); -PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR(/); +PROVIDE_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(+); +PROVIDE_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(-); +PROVIDE_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(*); +PROVIDE_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(/); -PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(+, float); -PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(-, float); -PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(*, float); -PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(/, float); +PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(+, float); +PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(-, float); +PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(*, float); +PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(/, float); -PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(+, double); -PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(-, double); -PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(*, double); -PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR(/, double); +PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(+, double); +PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(-, double); +PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(*, double); +PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(/, double); -#define PROVIDE_HALF_COMPARISON_OPERATOR(op) \ - GT_INLINE bool operator op(const half& lhs, const half& rhs) \ +#define PROVIDE_FLOAT16T_COMPARISON_OPERATOR(op) \ + GT_INLINE bool operator op(const float16_t& lhs, const float16_t& rhs) \ { return lhs.Get() op rhs.Get(); } -#define PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(op, fp_type) \ +#define PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(op, fp_type) \ \ - GT_INLINE bool operator op(const half& lhs, const fp_type& rhs) \ + GT_INLINE bool operator op(const float16_t& lhs, const fp_type& rhs) \ { return static_cast(lhs.Get()) op rhs; } \ \ - GT_INLINE bool operator op(const fp_type& lhs, const half& rhs) \ + GT_INLINE bool operator op(const fp_type& lhs, const float16_t& rhs) \ { return lhs op static_cast(rhs.Get()); } -PROVIDE_HALF_COMPARISON_OPERATOR(==); -PROVIDE_HALF_COMPARISON_OPERATOR(!=); -PROVIDE_HALF_COMPARISON_OPERATOR(<); -PROVIDE_HALF_COMPARISON_OPERATOR(<=); -PROVIDE_HALF_COMPARISON_OPERATOR(>); -PROVIDE_HALF_COMPARISON_OPERATOR(>=); - -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(==, float); -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(!=, float); -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(<, float); -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(<=, float); -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(>, float); -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(>=, float); - -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(==, double); -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(!=, double); -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(<, double); -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(<=, double); -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(>, double); -PROVIDE_MIXED_HALF_COMPARISON_OPERATOR(>=, double); - -std::ostream& operator<<(std::ostream& s, const half& h) +PROVIDE_FLOAT16T_COMPARISON_OPERATOR(==); +PROVIDE_FLOAT16T_COMPARISON_OPERATOR(!=); +PROVIDE_FLOAT16T_COMPARISON_OPERATOR(<); +PROVIDE_FLOAT16T_COMPARISON_OPERATOR(<=); +PROVIDE_FLOAT16T_COMPARISON_OPERATOR(>); +PROVIDE_FLOAT16T_COMPARISON_OPERATOR(>=); + +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(==, float); +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(!=, float); +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(<, float); +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(<=, float); +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(>, float); +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(>=, float); + +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(==, double); +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(!=, double); +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(<, double); +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(<=, double); +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(>, double); +PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(>=, double); + +std::ostream& operator<<(std::ostream& s, const float16_t& h) { s << static_cast(h.Get()); return s; } } // namespace gt -#undef PROVIDE_HALF_BINARY_ARITHMETIC_OPERATOR -#undef PROVIDE_MIXED_HALF_BINARY_ARITHMETIC_OPERATOR -#undef PROVIDE_HALF_COMPARISON_OPERATOR -#undef PROVIDE_MIXED_HALF_COMPARISON_OPERATOR +#undef PROVIDE_FLOAT16T_BINARY_ARITHMETIC_OPERATOR +#undef PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR +#undef PROVIDE_FLOAT16T_COMPARISON_OPERATOR +#undef PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR #endif diff --git a/tests/test_half.cxx b/tests/test_half.cxx index 9705c381..4cc8f5b7 100644 --- a/tests/test_half.cxx +++ b/tests/test_half.cxx @@ -4,13 +4,13 @@ #include -TEST(half, scalar_arithmetic) +TEST(float16_t, scalar_arithmetic) { - gt::half a{1.0}; - gt::half b{2.0}; + gt::float16_t a{1.0}; + gt::float16_t b{2.0}; - gt::half c{0.0}; - gt::half ref{0.0}; + gt::float16_t c{0.0}; + gt::float16_t ref{0.0}; c = a + b; ref = 3.0; @@ -29,11 +29,11 @@ TEST(half, scalar_arithmetic) EXPECT_EQ(c, ref); } -TEST(half, binary_comparison_operators) +TEST(float16_t, binary_comparison_operators) { - gt::half a{1.0}; - gt::half b{2.0}; - gt::half c{2.0}; + gt::float16_t a{1.0}; + gt::float16_t b{2.0}; + gt::float16_t c{2.0}; EXPECT_EQ(a, a); EXPECT_EQ(b, b); @@ -70,29 +70,29 @@ TEST(half, binary_comparison_operators) } template -void generic_fill_1D(gt::gtensor& x, const gt::half& fill_value) +void generic_fill_1D(gt::gtensor& x, const gt::float16_t& fill_value) { auto k_x = x.to_kernel(); gt::launch<1, S>(x.shape(), GT_LAMBDA(int i) { k_x(i) = fill_value; }); } -TEST(half, auto_init_host) +TEST(float16_t, auto_init_host) { - gt::half fill_value{1.25}; - gt::gtensor a(gt::shape(5), fill_value); - gt::gtensor b(a.shape()); + gt::float16_t fill_value{1.25}; + gt::gtensor a(gt::shape(5), fill_value); + gt::gtensor b(a.shape()); generic_fill_1D(b, fill_value); EXPECT_EQ(a, b); } -TEST(half, auto_init_device) +TEST(float16_t, auto_init_device) { - gt::half fill_value{1.25}; - gt::gtensor a(gt::shape(5), fill_value); - gt::gtensor b(a.shape()); + gt::float16_t fill_value{1.25}; + gt::gtensor a(gt::shape(5), fill_value); + gt::gtensor b(a.shape()); generic_fill_1D(b, fill_value); @@ -100,9 +100,9 @@ TEST(half, auto_init_device) } template -void generic_explicit_haxpy_1D( const gt::half& a, - const gt::gtensor& x, - gt::gtensor& y) +void generic_explicit_haxpy_1D( const gt::float16_t& a, + const gt::gtensor& x, + gt::gtensor& y) { auto k_x = x.to_kernel(); auto k_y = y.to_kernel(); @@ -111,48 +111,48 @@ void generic_explicit_haxpy_1D( const gt::half& a, y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); } -TEST(half, haxpy_explicit_1D_host) +TEST(float16_t, haxpy_explicit_1D_host) { - gt::gtensor x(gt::shape(3), 1.5); - gt::gtensor y(x.shape(), 2.5); - gt::half a{0.5}; - gt::gtensor ref(x.shape(), 3.25); + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); + gt::float16_t a{0.5}; + gt::gtensor ref(x.shape(), 3.25); generic_explicit_haxpy_1D(a, x, y); EXPECT_EQ(y, ref); } -TEST(half, haxpy_explicit_1D_device) +TEST(float16_t, haxpy_explicit_1D_device) { - gt::gtensor x(gt::shape(3), 1.5); - gt::gtensor y(x.shape(), 2.5); - gt::half a{0.5}; - gt::gtensor ref(y.shape(), 3.25); + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); + gt::float16_t a{0.5}; + gt::gtensor ref(y.shape(), 3.25); generic_explicit_haxpy_1D(a, x, y); EXPECT_EQ(y, ref); } -TEST(half, haxpy_implicit_1D_host) +TEST(float16_t, haxpy_implicit_1D_host) { - gt::gtensor x(gt::shape(3), 1.5); - gt::gtensor y(x.shape(), 2.5); - gt::half a{0.5}; - gt::gtensor ref(x.shape(), 3.25); + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); + gt::float16_t a{0.5}; + gt::gtensor ref(x.shape(), 3.25); y = a * x + y; EXPECT_EQ(y, ref); } -TEST(half, haxpy_implicit_1D_device) +TEST(float16_t, haxpy_implicit_1D_device) { - gt::gtensor x(gt::shape(3), 1.5); - gt::gtensor y(x.shape(), 2.5); - gt::half a{0.5}; - gt::gtensor ref(y.shape(), 3.25); + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); + gt::float16_t a{0.5}; + gt::gtensor ref(y.shape(), 3.25); y = a * x + y; @@ -160,14 +160,14 @@ TEST(half, haxpy_implicit_1D_device) } template -void generic_explicit_custom_kernel_1D( const gt::half& s1, - const gt::half& s2, - const gt::gtensor& a, - const gt::gtensor& b, - const gt::gtensor& c, - const gt::gtensor& d, - const gt::gtensor& e, - gt::gtensor& result) +void generic_explicit_custom_kernel_1D( const gt::float16_t& s1, + const gt::float16_t& s2, + const gt::gtensor& a, + const gt::gtensor& b, + const gt::gtensor& c, + const gt::gtensor& d, + const gt::gtensor& e, + gt::gtensor& result) { auto k_a = a.to_kernel(); auto k_b = b.to_kernel(); @@ -180,30 +180,30 @@ void generic_explicit_custom_kernel_1D( const gt::half& s1, { k_r(i) = s2 - k_e(i) * ((k_a(i) - s1 * k_b(i)) / k_c(i) + k_d(i)); }); } -TEST(half, custom_kernel_explicit_implicit_host_device) +TEST(float16_t, custom_kernel_explicit_implicit_host_device) { - gt::half a_val{12.34}, b_val{2.345}, c_val{0.987}, d_val{0.67}, e_val{3.14}; - gt::half s1{0.1}, s2{4.56}; + gt::float16_t a_val{12.34}, b_val{2.345}, c_val{0.987}, d_val{0.67}, e_val{3.14}; + gt::float16_t s1{0.1}, s2{4.56}; - gt::half r = s2 - e_val * ((a_val - s1 * b_val) / c_val + d_val); + gt::float16_t r = s2 - e_val * ((a_val - s1 * b_val) / c_val + d_val); auto shape = gt::shape(3); - gt::gtensor h_a(shape, a_val); - gt::gtensor h_b(shape, b_val); - gt::gtensor h_c(shape, c_val); - gt::gtensor h_d(shape, d_val); - gt::gtensor h_e(shape, e_val); - gt::gtensor h_r_expl(shape); - gt::gtensor h_r_impl(shape); - - gt::gtensor d_a(shape, a_val); - gt::gtensor d_b(shape, b_val); - gt::gtensor d_c(shape, c_val); - gt::gtensor d_d(shape, d_val); - gt::gtensor d_e(shape, e_val); - gt::gtensor d_r_expl(shape); - gt::gtensor d_r_impl(shape); + gt::gtensor h_a(shape, a_val); + gt::gtensor h_b(shape, b_val); + gt::gtensor h_c(shape, c_val); + gt::gtensor h_d(shape, d_val); + gt::gtensor h_e(shape, e_val); + gt::gtensor h_r_expl(shape); + gt::gtensor h_r_impl(shape); + + gt::gtensor d_a(shape, a_val); + gt::gtensor d_b(shape, b_val); + gt::gtensor d_c(shape, c_val); + gt::gtensor d_d(shape, d_val); + gt::gtensor d_e(shape, e_val); + gt::gtensor d_r_expl(shape); + gt::gtensor d_r_impl(shape); h_r_impl = s2 - h_e * ((h_a - s1 * h_b) / h_c + h_d); d_r_impl = s2 - d_e * ((d_a - s1 * d_b) / d_c + d_d); @@ -220,35 +220,35 @@ TEST(half, custom_kernel_explicit_implicit_host_device) EXPECT_EQ(h_r_impl, d_r_impl); } -TEST(half, mixed_precision_scalar) +TEST(float16_t, mixed_precision_scalar) { - gt::half a_half{1.0}; + gt::float16_t a_16{1.0}; - gt::half b_half{2.0}; - float b_float{2.0}; - double b_double{2.0}; + gt::float16_t b_16{2.0}; + float b_32{2.0}; + double b_64{2.0}; - auto c_half = a_half + b_half; - auto c_float = a_half + b_float; - auto c_double = a_half + b_double; + auto c_16 = a_16 + b_16; + auto c_32 = a_16 + b_32; + auto c_64 = a_16 + b_64; - EXPECT_TRUE((std::is_same::value)); - EXPECT_TRUE((std::is_same::value)); - EXPECT_TRUE((std::is_same::value)); + EXPECT_TRUE((std::is_same::value)); + EXPECT_TRUE((std::is_same::value)); + EXPECT_TRUE((std::is_same::value)); - EXPECT_EQ(c_half, c_float); - EXPECT_EQ(c_half, c_double); + EXPECT_EQ(c_16, c_32); + EXPECT_EQ(c_16, c_64); } template void test_mixed_precision_helper() { auto shape = gt::shape(3); - gt::gtensor vh(shape, 4.0); + gt::gtensor vh(shape, 4.0); gt::gtensor vf(shape, 3.0); gt::gtensor vd(shape, 2.0); - gt::gtensor rh(shape); + gt::gtensor rh(shape); gt::gtensor rf(shape); gt::gtensor rd(shape); @@ -263,12 +263,12 @@ void test_mixed_precision_helper() EXPECT_EQ(ref, rd); } -TEST(half, mixed_precision_host) +TEST(float16_t, mixed_precision_host) { test_mixed_precision_helper(); } -TEST(half, mixed_precision_device) +TEST(float16_t, mixed_precision_device) { test_mixed_precision_helper(); } From f014cce09e50cad41271cff3cd141e5d076cfe14 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Thu, 7 Sep 2023 14:20:39 +0200 Subject: [PATCH 35/38] [rename] Filenames & includes: half --> float16_t --- include/gtensor/{half.h => float16_t.h} | 0 include/gtensor/gtensor.h | 2 +- tests/CMakeLists.txt | 2 +- tests/{test_half.cxx => test_float16_t.cxx} | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename include/gtensor/{half.h => float16_t.h} (100%) rename tests/{test_half.cxx => test_float16_t.cxx} (99%) diff --git a/include/gtensor/half.h b/include/gtensor/float16_t.h similarity index 100% rename from include/gtensor/half.h rename to include/gtensor/float16_t.h diff --git a/include/gtensor/gtensor.h b/include/gtensor/gtensor.h index 70dee7d3..feac235d 100644 --- a/include/gtensor/gtensor.h +++ b/include/gtensor/gtensor.h @@ -20,7 +20,7 @@ #include "space.h" #if defined(GTENSOR_ENABLE_FP16) -#include "half.h" +#include "float16_t.h" #endif namespace gt diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 41a4f10d..b68cdeee 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -82,5 +82,5 @@ if (GTENSOR_ENABLE_FFT) endif() if (GTENSOR_ENABLE_FP16) - add_gtensor_test(test_half) + add_gtensor_test(test_float16_t) endif() diff --git a/tests/test_half.cxx b/tests/test_float16_t.cxx similarity index 99% rename from tests/test_half.cxx rename to tests/test_float16_t.cxx index 4cc8f5b7..b8687595 100644 --- a/tests/test_half.cxx +++ b/tests/test_float16_t.cxx @@ -2,7 +2,7 @@ #include -#include +#include TEST(float16_t, scalar_arithmetic) { From 278d552b71462f07922ad1d75622fe0b45fdc77f Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Thu, 7 Sep 2023 14:40:14 +0200 Subject: [PATCH 36/38] [clean] Remove macros.h include --- include/gtensor/float16_t.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/gtensor/float16_t.h b/include/gtensor/float16_t.h index 5e21701f..26a4f367 100644 --- a/include/gtensor/float16_t.h +++ b/include/gtensor/float16_t.h @@ -11,8 +11,6 @@ #error "GTENSOR_ENABLE_FP16=ON, but no 16-bit FP type available!" #endif -#include - namespace gt { From f53673fa8257b767f05f7e3e317e6186c78a1e47 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Thu, 7 Sep 2023 14:50:10 +0200 Subject: [PATCH 37/38] [clean] Applied clang-format --- include/gtensor/float16_t.h | 83 +++++---- tests/test_float16_t.cxx | 361 ++++++++++++++++++------------------ 2 files changed, 234 insertions(+), 210 deletions(-) diff --git a/include/gtensor/float16_t.h b/include/gtensor/float16_t.h index 26a4f367..1b65f83d 100644 --- a/include/gtensor/float16_t.h +++ b/include/gtensor/float16_t.h @@ -23,9 +23,8 @@ using storage_type = __half; #error "GTENSOR_ENABLE_FP16=ON, but no 16-bit FP type available!" #endif -#if defined(GTENSOR_FP16_CUDA_HEADER) \ - && defined(__CUDA_ARCH__) \ - && (__CUDA_ARCH__ >= 530) +#if defined(GTENSOR_FP16_CUDA_HEADER) && defined(__CUDA_ARCH__) && \ + (__CUDA_ARCH__ >= 530) using compute_type = __half; #else using compute_type = float; @@ -34,27 +33,38 @@ using compute_type = float; class float16_t { public: - float16_t() = default; - GT_INLINE float16_t(float x) : x(x) {}; - GT_INLINE float16_t(storage_type x) : x(x) {}; + float16_t() = default; + GT_INLINE float16_t(float x) : x(x){}; + GT_INLINE float16_t(storage_type x) : x(x){}; + + GT_INLINE const float16_t& operator=(const float f) + { + x = f; + return *this; + } + GT_INLINE compute_type Get() const { return static_cast(x); } - GT_INLINE const float16_t& operator=(const float f) { x = f; return *this; } - GT_INLINE compute_type Get() const { return static_cast(x); } private: - storage_type x; + storage_type x; }; -#define PROVIDE_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(op) \ - GT_INLINE float16_t operator op(const float16_t& lhs, const float16_t& rhs) \ - { return float16_t( lhs.Get() op rhs.Get() ); } - -#define PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(op, fp_type) \ - \ - GT_INLINE fp_type operator op(const float16_t& lhs, const fp_type& rhs) \ - { return static_cast(lhs.Get()) op rhs; } \ - \ - GT_INLINE fp_type operator op(const fp_type& lhs, const float16_t& rhs) \ - { return lhs op static_cast(rhs.Get()); } +#define PROVIDE_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(op) \ + GT_INLINE float16_t operator op(const float16_t& lhs, const float16_t& rhs) \ + { \ + return float16_t(lhs.Get() op rhs.Get()); \ + } + +#define PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(op, fp_type) \ + \ + GT_INLINE fp_type operator op(const float16_t& lhs, const fp_type& rhs) \ + { \ + return static_cast(lhs.Get()) op rhs; \ + } \ + \ + GT_INLINE fp_type operator op(const fp_type& lhs, const float16_t& rhs) \ + { \ + return lhs op static_cast(rhs.Get()); \ + } PROVIDE_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(+); PROVIDE_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(-); @@ -71,17 +81,23 @@ PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(-, double); PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(*, double); PROVIDE_MIXED_FLOAT16T_BINARY_ARITHMETIC_OPERATOR(/, double); -#define PROVIDE_FLOAT16T_COMPARISON_OPERATOR(op) \ - GT_INLINE bool operator op(const float16_t& lhs, const float16_t& rhs) \ - { return lhs.Get() op rhs.Get(); } - -#define PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(op, fp_type) \ - \ - GT_INLINE bool operator op(const float16_t& lhs, const fp_type& rhs) \ - { return static_cast(lhs.Get()) op rhs; } \ - \ - GT_INLINE bool operator op(const fp_type& lhs, const float16_t& rhs) \ - { return lhs op static_cast(rhs.Get()); } +#define PROVIDE_FLOAT16T_COMPARISON_OPERATOR(op) \ + GT_INLINE bool operator op(const float16_t& lhs, const float16_t& rhs) \ + { \ + return lhs.Get() op rhs.Get(); \ + } + +#define PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(op, fp_type) \ + \ + GT_INLINE bool operator op(const float16_t& lhs, const fp_type& rhs) \ + { \ + return static_cast(lhs.Get()) op rhs; \ + } \ + \ + GT_INLINE bool operator op(const fp_type& lhs, const float16_t& rhs) \ + { \ + return lhs op static_cast(rhs.Get()); \ + } PROVIDE_FLOAT16T_COMPARISON_OPERATOR(==); PROVIDE_FLOAT16T_COMPARISON_OPERATOR(!=); @@ -105,7 +121,10 @@ PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(>, double); PROVIDE_MIXED_FLOAT16T_COMPARISON_OPERATOR(>=, double); std::ostream& operator<<(std::ostream& s, const float16_t& h) -{ s << static_cast(h.Get()); return s; } +{ + s << static_cast(h.Get()); + return s; +} } // namespace gt diff --git a/tests/test_float16_t.cxx b/tests/test_float16_t.cxx index b8687595..469b4725 100644 --- a/tests/test_float16_t.cxx +++ b/tests/test_float16_t.cxx @@ -6,269 +6,274 @@ TEST(float16_t, scalar_arithmetic) { - gt::float16_t a{1.0}; - gt::float16_t b{2.0}; + gt::float16_t a{1.0}; + gt::float16_t b{2.0}; - gt::float16_t c{0.0}; - gt::float16_t ref{0.0}; + gt::float16_t c{0.0}; + gt::float16_t ref{0.0}; - c = a + b; - ref = 3.0; - EXPECT_EQ(c, ref); + c = a + b; + ref = 3.0; + EXPECT_EQ(c, ref); - c = a - b; - ref = -1.0; - EXPECT_EQ(c, ref); + c = a - b; + ref = -1.0; + EXPECT_EQ(c, ref); - c = a * b; - ref = 2.0; - EXPECT_EQ(c, ref); + c = a * b; + ref = 2.0; + EXPECT_EQ(c, ref); - c = a / b; - ref = 0.5; - EXPECT_EQ(c, ref); + c = a / b; + ref = 0.5; + EXPECT_EQ(c, ref); } TEST(float16_t, binary_comparison_operators) { - gt::float16_t a{1.0}; - gt::float16_t b{2.0}; - gt::float16_t c{2.0}; - - EXPECT_EQ(a, a); - EXPECT_EQ(b, b); - EXPECT_EQ(b, c); - EXPECT_EQ(c, b); - EXPECT_EQ(c, c); - - EXPECT_NE(a, b); - EXPECT_NE(a, c); - EXPECT_NE(b, a); - EXPECT_NE(c, a); - - EXPECT_LT(a, b); - EXPECT_LT(a, c); - - EXPECT_LE(a, a); - EXPECT_LE(a, b); - EXPECT_LE(a, c); - EXPECT_LE(b, b); - EXPECT_LE(b, c); - EXPECT_LE(c, b); - EXPECT_LE(c, c); - - EXPECT_GT(b, a); - EXPECT_GT(c, a); - - EXPECT_GE(a, a); - EXPECT_GE(b, a); - EXPECT_GE(b, b); - EXPECT_GE(b, c); - EXPECT_GE(c, a); - EXPECT_GE(c, b); - EXPECT_GE(c, c); + gt::float16_t a{1.0}; + gt::float16_t b{2.0}; + gt::float16_t c{2.0}; + + EXPECT_EQ(a, a); + EXPECT_EQ(b, b); + EXPECT_EQ(b, c); + EXPECT_EQ(c, b); + EXPECT_EQ(c, c); + + EXPECT_NE(a, b); + EXPECT_NE(a, c); + EXPECT_NE(b, a); + EXPECT_NE(c, a); + + EXPECT_LT(a, b); + EXPECT_LT(a, c); + + EXPECT_LE(a, a); + EXPECT_LE(a, b); + EXPECT_LE(a, c); + EXPECT_LE(b, b); + EXPECT_LE(b, c); + EXPECT_LE(c, b); + EXPECT_LE(c, c); + + EXPECT_GT(b, a); + EXPECT_GT(c, a); + + EXPECT_GE(a, a); + EXPECT_GE(b, a); + EXPECT_GE(b, b); + EXPECT_GE(b, c); + EXPECT_GE(c, a); + EXPECT_GE(c, b); + EXPECT_GE(c, c); } template -void generic_fill_1D(gt::gtensor& x, const gt::float16_t& fill_value) +void generic_fill_1D(gt::gtensor& x, + const gt::float16_t& fill_value) { - auto k_x = x.to_kernel(); + auto k_x = x.to_kernel(); - gt::launch<1, S>(x.shape(), GT_LAMBDA(int i) { k_x(i) = fill_value; }); + gt::launch<1, S>( + x.shape(), GT_LAMBDA(int i) { k_x(i) = fill_value; }); } TEST(float16_t, auto_init_host) { - gt::float16_t fill_value{1.25}; - gt::gtensor a(gt::shape(5), fill_value); - gt::gtensor b(a.shape()); + gt::float16_t fill_value{1.25}; + gt::gtensor a(gt::shape(5), fill_value); + gt::gtensor b(a.shape()); - generic_fill_1D(b, fill_value); + generic_fill_1D(b, fill_value); - EXPECT_EQ(a, b); + EXPECT_EQ(a, b); } TEST(float16_t, auto_init_device) { - gt::float16_t fill_value{1.25}; - gt::gtensor a(gt::shape(5), fill_value); - gt::gtensor b(a.shape()); + gt::float16_t fill_value{1.25}; + gt::gtensor a(gt::shape(5), fill_value); + gt::gtensor b(a.shape()); - generic_fill_1D(b, fill_value); + generic_fill_1D(b, fill_value); - EXPECT_EQ(a, b); + EXPECT_EQ(a, b); } template -void generic_explicit_haxpy_1D( const gt::float16_t& a, - const gt::gtensor& x, - gt::gtensor& y) +void generic_explicit_haxpy_1D(const gt::float16_t& a, + const gt::gtensor& x, + gt::gtensor& y) { - auto k_x = x.to_kernel(); - auto k_y = y.to_kernel(); + auto k_x = x.to_kernel(); + auto k_y = y.to_kernel(); - gt::launch<1, S>( - y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); + gt::launch<1, S>( + y.shape(), GT_LAMBDA(int i) { k_y(i) = k_y(i) + a * k_x(i); }); } TEST(float16_t, haxpy_explicit_1D_host) { - gt::gtensor x(gt::shape(3), 1.5); - gt::gtensor y(x.shape(), 2.5); - gt::float16_t a{0.5}; - gt::gtensor ref(x.shape(), 3.25); + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); + gt::float16_t a{0.5}; + gt::gtensor ref(x.shape(), 3.25); - generic_explicit_haxpy_1D(a, x, y); + generic_explicit_haxpy_1D(a, x, y); - EXPECT_EQ(y, ref); + EXPECT_EQ(y, ref); } TEST(float16_t, haxpy_explicit_1D_device) { - gt::gtensor x(gt::shape(3), 1.5); - gt::gtensor y(x.shape(), 2.5); - gt::float16_t a{0.5}; - gt::gtensor ref(y.shape(), 3.25); + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); + gt::float16_t a{0.5}; + gt::gtensor ref(y.shape(), 3.25); - generic_explicit_haxpy_1D(a, x, y); + generic_explicit_haxpy_1D(a, x, y); - EXPECT_EQ(y, ref); + EXPECT_EQ(y, ref); } TEST(float16_t, haxpy_implicit_1D_host) { - gt::gtensor x(gt::shape(3), 1.5); - gt::gtensor y(x.shape(), 2.5); - gt::float16_t a{0.5}; - gt::gtensor ref(x.shape(), 3.25); + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); + gt::float16_t a{0.5}; + gt::gtensor ref(x.shape(), 3.25); - y = a * x + y; + y = a * x + y; - EXPECT_EQ(y, ref); + EXPECT_EQ(y, ref); } TEST(float16_t, haxpy_implicit_1D_device) { - gt::gtensor x(gt::shape(3), 1.5); - gt::gtensor y(x.shape(), 2.5); - gt::float16_t a{0.5}; - gt::gtensor ref(y.shape(), 3.25); + gt::gtensor x(gt::shape(3), 1.5); + gt::gtensor y(x.shape(), 2.5); + gt::float16_t a{0.5}; + gt::gtensor ref(y.shape(), 3.25); - y = a * x + y; + y = a * x + y; - EXPECT_EQ(y, ref); + EXPECT_EQ(y, ref); } template -void generic_explicit_custom_kernel_1D( const gt::float16_t& s1, - const gt::float16_t& s2, - const gt::gtensor& a, - const gt::gtensor& b, - const gt::gtensor& c, - const gt::gtensor& d, - const gt::gtensor& e, - gt::gtensor& result) +void generic_explicit_custom_kernel_1D( + const gt::float16_t& s1, const gt::float16_t& s2, + const gt::gtensor& a, + const gt::gtensor& b, + const gt::gtensor& c, + const gt::gtensor& d, + const gt::gtensor& e, + gt::gtensor& result) { - auto k_a = a.to_kernel(); - auto k_b = b.to_kernel(); - auto k_c = c.to_kernel(); - auto k_d = d.to_kernel(); - auto k_e = e.to_kernel(); - auto k_r = result.to_kernel(); - - gt::launch<1, S>(result.shape(), GT_LAMBDA(int i) - { k_r(i) = s2 - k_e(i) * ((k_a(i) - s1 * k_b(i)) / k_c(i) + k_d(i)); }); + auto k_a = a.to_kernel(); + auto k_b = b.to_kernel(); + auto k_c = c.to_kernel(); + auto k_d = d.to_kernel(); + auto k_e = e.to_kernel(); + auto k_r = result.to_kernel(); + + gt::launch<1, S>( + result.shape(), GT_LAMBDA(int i) { + k_r(i) = s2 - k_e(i) * ((k_a(i) - s1 * k_b(i)) / k_c(i) + k_d(i)); + }); } TEST(float16_t, custom_kernel_explicit_implicit_host_device) { - gt::float16_t a_val{12.34}, b_val{2.345}, c_val{0.987}, d_val{0.67}, e_val{3.14}; - gt::float16_t s1{0.1}, s2{4.56}; - - gt::float16_t r = s2 - e_val * ((a_val - s1 * b_val) / c_val + d_val); - - auto shape = gt::shape(3); - - gt::gtensor h_a(shape, a_val); - gt::gtensor h_b(shape, b_val); - gt::gtensor h_c(shape, c_val); - gt::gtensor h_d(shape, d_val); - gt::gtensor h_e(shape, e_val); - gt::gtensor h_r_expl(shape); - gt::gtensor h_r_impl(shape); - - gt::gtensor d_a(shape, a_val); - gt::gtensor d_b(shape, b_val); - gt::gtensor d_c(shape, c_val); - gt::gtensor d_d(shape, d_val); - gt::gtensor d_e(shape, e_val); - gt::gtensor d_r_expl(shape); - gt::gtensor d_r_impl(shape); - - h_r_impl = s2 - h_e * ((h_a - s1 * h_b) / h_c + h_d); - d_r_impl = s2 - d_e * ((d_a - s1 * d_b) / d_c + d_d); - - generic_explicit_custom_kernel_1D(s1, s2, - h_a, h_b, h_c, h_d, h_e, h_r_expl); - - generic_explicit_custom_kernel_1D(s1, s2, - d_a, d_b, d_c, d_d, d_e, d_r_expl); - - EXPECT_EQ(h_r_impl(2), r); - EXPECT_EQ(h_r_impl, h_r_expl); - EXPECT_EQ(h_r_impl, d_r_expl); - EXPECT_EQ(h_r_impl, d_r_impl); + gt::float16_t a_val{12.34}, b_val{2.345}, c_val{0.987}, d_val{0.67}, + e_val{3.14}; + gt::float16_t s1{0.1}, s2{4.56}; + + gt::float16_t r = s2 - e_val * ((a_val - s1 * b_val) / c_val + d_val); + + auto shape = gt::shape(3); + + gt::gtensor h_a(shape, a_val); + gt::gtensor h_b(shape, b_val); + gt::gtensor h_c(shape, c_val); + gt::gtensor h_d(shape, d_val); + gt::gtensor h_e(shape, e_val); + gt::gtensor h_r_expl(shape); + gt::gtensor h_r_impl(shape); + + gt::gtensor d_a(shape, a_val); + gt::gtensor d_b(shape, b_val); + gt::gtensor d_c(shape, c_val); + gt::gtensor d_d(shape, d_val); + gt::gtensor d_e(shape, e_val); + gt::gtensor d_r_expl(shape); + gt::gtensor d_r_impl(shape); + + h_r_impl = s2 - h_e * ((h_a - s1 * h_b) / h_c + h_d); + d_r_impl = s2 - d_e * ((d_a - s1 * d_b) / d_c + d_d); + + generic_explicit_custom_kernel_1D(s1, s2, h_a, h_b, h_c, h_d, + h_e, h_r_expl); + + generic_explicit_custom_kernel_1D(s1, s2, d_a, d_b, d_c, + d_d, d_e, d_r_expl); + + EXPECT_EQ(h_r_impl(2), r); + EXPECT_EQ(h_r_impl, h_r_expl); + EXPECT_EQ(h_r_impl, d_r_expl); + EXPECT_EQ(h_r_impl, d_r_impl); } TEST(float16_t, mixed_precision_scalar) { - gt::float16_t a_16{1.0}; + gt::float16_t a_16{1.0}; - gt::float16_t b_16{2.0}; - float b_32{2.0}; - double b_64{2.0}; + gt::float16_t b_16{2.0}; + float b_32{2.0}; + double b_64{2.0}; - auto c_16 = a_16 + b_16; - auto c_32 = a_16 + b_32; - auto c_64 = a_16 + b_64; + auto c_16 = a_16 + b_16; + auto c_32 = a_16 + b_32; + auto c_64 = a_16 + b_64; - EXPECT_TRUE((std::is_same::value)); - EXPECT_TRUE((std::is_same::value)); - EXPECT_TRUE((std::is_same::value)); + EXPECT_TRUE((std::is_same::value)); + EXPECT_TRUE((std::is_same::value)); + EXPECT_TRUE((std::is_same::value)); - EXPECT_EQ(c_16, c_32); - EXPECT_EQ(c_16, c_64); + EXPECT_EQ(c_16, c_32); + EXPECT_EQ(c_16, c_64); } -template +template void test_mixed_precision_helper() { - auto shape = gt::shape(3); - gt::gtensor vh(shape, 4.0); - gt::gtensor vf(shape, 3.0); - gt::gtensor vd(shape, 2.0); + auto shape = gt::shape(3); + gt::gtensor vh(shape, 4.0); + gt::gtensor vf(shape, 3.0); + gt::gtensor vd(shape, 2.0); - gt::gtensor rh(shape); - gt::gtensor rf(shape); - gt::gtensor rd(shape); + gt::gtensor rh(shape); + gt::gtensor rf(shape); + gt::gtensor rd(shape); - gt::gtensor ref(shape, 10.0); + gt::gtensor ref(shape, 10.0); - rh = (vh * vf) - (vh / vd); - rf = (vh * vf) - (vh / vd); - rd = (vh * vf) - (vh / vd); + rh = (vh * vf) - (vh / vd); + rf = (vh * vf) - (vh / vd); + rd = (vh * vf) - (vh / vd); - EXPECT_EQ(ref, rh); - EXPECT_EQ(ref, rf); - EXPECT_EQ(ref, rd); + EXPECT_EQ(ref, rh); + EXPECT_EQ(ref, rf); + EXPECT_EQ(ref, rd); } TEST(float16_t, mixed_precision_host) { - test_mixed_precision_helper(); + test_mixed_precision_helper(); } TEST(float16_t, mixed_precision_device) { - test_mixed_precision_helper(); + test_mixed_precision_helper(); } From 88f100d2ec14694439a3fdc32da1d94ab0c186c7 Mon Sep 17 00:00:00 2001 From: cmpfeil Date: Thu, 7 Sep 2023 15:45:59 +0200 Subject: [PATCH 38/38] [rename] Missed some gt::half --> gt::float16_t --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ca21e8cb..fe17a217 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,7 +51,7 @@ option(GTENSOR_ALLOCATOR_CACHING "Enable naive caching allocators" ON) option(GTENSOR_BOUNDS_CHECK "Enable per access bounds checking" OFF) option(GTENSOR_ADDRESS_CHECK "Enable address checking for device spans" OFF) option(GTENSOR_SYNC_KERNELS "Enable host sync after assign and launch kernels" OFF) -option(GTENSOR_ENABLE_FP16 "Enable 16-bit floating point type gt::half" OFF) +option(GTENSOR_ENABLE_FP16 "Enable 16-bit floating point type gt::float16_t" OFF) if (GTENSOR_ENABLE_FORTRAN) # do this early (here) since later the `enable_language(Fortran)` gives me trouble @@ -337,7 +337,7 @@ else() endif() if (GTENSOR_ENABLE_FP16) - message(STATUS "${PROJECT_NAME}: gt::half is ENABLED") + message(STATUS "${PROJECT_NAME}: gt::float16_t is ENABLED") target_compile_definitions(gtensor_${GTENSOR_DEVICE} INTERFACE GTENSOR_ENABLE_FP16) endif()