Skip to content

Commit

Permalink
ff/baby_bear.hpp: inline bb31_4_t operator* in non-device-c builds.
Browse files Browse the repository at this point in the history
One can make a case for introducing a dedicated macro to control the
behaviour, as opposed to relying on a common compiler flag.
  • Loading branch information
dot-asm committed Jul 7, 2024
1 parent 2606ece commit 5d30a87
Showing 1 changed file with 20 additions and 1 deletion.
21 changes: 20 additions & 1 deletion ff/baby_bear.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class __align__(16) bb31_4_t {
# define asm asm volatile
# endif
// +20% in comparison to multiplication by itself even though
// the amount of instructions is the same...
// the amount of instructions is the same...
// ret[0] = a[0]*a[0] + BETA*(2*a[1]*a[3] + a[2]*a[2]);
asm("{ .reg.b32 %lo, %hi, %m; .reg.pred %p;\n\t"
"mul.lo.u32 %lo, %4, %2; mul.hi.u32 %hi, %4, %2;\n\t"
Expand Down Expand Up @@ -385,6 +385,7 @@ class __align__(16) bb31_4_t {
}

public:
# ifdef __CUDACC_RDC__
friend __device__ __noinline__ bb31_4_t operator*(bb31_4_t a, bb31_4_t b)
{ return a.mul(b); }
inline bb31_4_t& operator*=(const bb31_4_t& b)
Expand All @@ -403,6 +404,24 @@ class __align__(16) bb31_4_t {
{ return a * b; }
inline bb31_4_t& operator*=(bb31_t b)
{ return *this = *this * b; }
# else
friend inline bb31_4_t operator*(bb31_4_t a, const bb31_4_t& b)
{ return a.mul(b); }
inline bb31_4_t& operator*=(const bb31_4_t& b)
{ return mul(b); }

inline bb31_4_t& operator*=(bb31_t b)
{
for (size_t i = 0; i < 4; i++)
c[i] *= b;

return *this;
}
friend inline bb31_4_t operator*(bb31_4_t a, bb31_t b)
{ return a *= b; }
friend inline bb31_4_t operator*(bb31_t b, bb31_4_t a)
{ return a *= b; }
# endif

friend inline bb31_4_t operator+(const bb31_4_t& a, const bb31_4_t& b)
{
Expand Down

0 comments on commit 5d30a87

Please sign in to comment.