Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
serge-sans-paille committed Sep 17, 2024
1 parent fa526c1 commit dd15471
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
10 changes: 8 additions & 2 deletions include/xsimd/arch/generic/xsimd_generic_logical.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ namespace xsimd

// count
template <class A, class T>
XSIMD_INLINE size_t count(batch_bool<T, A> const& x, requires_arch<generic>) noexcept
XSIMD_INLINE size_t count(batch_bool<T, A> const& self, requires_arch<generic>) noexcept
{
uint64_t m = x.mask();
uint64_t m = self.mask();
XSIMD_IF_CONSTEXPR(batch_bool<T, A>::size < 14)
{
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64
Expand All @@ -41,9 +41,14 @@ namespace xsimd
#define builtin_popcount(v) __builtin_popcount(v)
#endif
#endif

#ifdef builtin_popcount
return builtin_popcount(m);
#else
alignas(A::alignment()) bool buffer[batch_bool<T, A>::size];
self.store_aligned(buffer);
return std::count(std::begin(buffer), std::end(buffer), true);
#if 0
// FIXME: we could do better by dispatching to the appropriate
// popcount instruction depending on the arch...
XSIMD_IF_CONSTEXPR(batch_bool<T, A>::size < 32)
Expand All @@ -61,6 +66,7 @@ namespace xsimd
m = (m + (m >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15; // temp
return (m * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * CHAR_BIT; // count
}
#endif
#endif
}
}
Expand Down
2 changes: 2 additions & 0 deletions include/xsimd/arch/xsimd_emulated.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ namespace xsimd
return r;
}

#if 0
// count
template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
XSIMD_INLINE size_t count(batch_bool<T, A> const& x, requires_arch<emulated<N>>) noexcept
Expand All @@ -241,6 +242,7 @@ namespace xsimd
m = (m + (m >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15; // temp
return (m * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * CHAR_BIT; // count
}
#endif

// store_complex
namespace detail
Expand Down

0 comments on commit dd15471

Please sign in to comment.