Skip to content

Commit

Permalink
Native ARM64 popcount for sizes above 1 (#103214)
Browse files Browse the repository at this point in the history
  • Loading branch information
neon-sunset committed Jun 12, 2024
1 parent bb2a99b commit f5ab8d0
Showing 1 changed file with 24 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,40 @@ public static void PopCount<T>(ReadOnlySpan<T> x, Span<T> destination)
// This relies on 64-bit shifts for sizeof(T) == 8, and such shifts aren't accelerated on today's hardware.
// Alternative approaches, such as doing two 32-bit operations and combining them were observed to not
// provide any meaningfuls speedup over scalar. So for now, we don't vectorize when sizeof(T) == 8.
sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4;
(sizeof(T) is 1 or 2 or 4) || (AdvSimd.IsSupported && sizeof(T) == 8);

public static T Invoke(T x) => T.PopCount(x);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<T> Invoke(Vector128<T> x)
{
if (sizeof(T) == 1)
if (AdvSimd.IsSupported)
{
if (AdvSimd.IsSupported)
Vector128<byte> cnt = AdvSimd.PopCount(x.AsByte());

if (sizeof(T) == 1)
{
return cnt.As<byte, T>();
}

if (sizeof(T) == 2)
{
return AdvSimd.AddPairwiseWidening(cnt).As<ushort, T>();
}

if (sizeof(T) == 4)
{
return AdvSimd.PopCount(x.AsByte()).As<byte, T>();
return AdvSimd.AddPairwiseWidening(AdvSimd.AddPairwiseWidening(cnt)).As<uint, T>();
}

if (sizeof(T) == 8)
{
return AdvSimd.AddPairwiseWidening(AdvSimd.AddPairwiseWidening(AdvSimd.AddPairwiseWidening(cnt))).As<ulong, T>();
}
}

if (sizeof(T) == 1)
{
if (PackedSimd.IsSupported)
{
return PackedSimd.PopCount(x.AsByte()).As<byte, T>();
Expand Down

0 comments on commit f5ab8d0

Please sign in to comment.