Skip to content

Commit

Permalink
Improve AVX512 code
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Jul 10, 2024
1 parent 2f51cf0 commit 20a77db
Showing 1 changed file with 7 additions and 8 deletions.
15 changes: 7 additions & 8 deletions include/Sieve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,28 +203,27 @@ class Sieve
else
{
uint64_t i = start_idx + 1;
uint64_t cnt = popcnt64(sieve64[start_idx] & m1);
__m512i vcnt = _mm512_setzero_si512();
uint64_t start_bits = sieve64[start_idx] & m1;
uint64_t stop_bits = sieve64[stop_idx] & m2;
__m512i vec = _mm512_set_epi64(0, 0, 0, 0, 0, 0, stop_bits, start_bits);
__m512i vcnt = _mm512_popcnt_epi64(vec);

// Compute this for loop using AVX512.
// for (i = start_idx + 1; i < stop_idx; i++)
// cnt += popcnt64(sieve64[i]);
//
for (; i + 8 < stop_idx; i += 8)
{
__m512i vec = _mm512_loadu_epi64(&sieve64[i]);
vec = _mm512_loadu_epi64(&sieve64[i]);
vec = _mm512_popcnt_epi64(vec);
vcnt = _mm512_add_epi64(vcnt, vec);
}

__mmask8 mask = (__mmask8) _bzhi_u64(0xff, stop_idx - i);
__m512i vec = _mm512_maskz_loadu_epi64(mask, &sieve64[i]);
vec = _mm512_maskz_loadu_epi64(mask, &sieve64[i]);
vec = _mm512_popcnt_epi64(vec);
vcnt = _mm512_add_epi64(vcnt, vec);

cnt += _mm512_reduce_add_epi64(vcnt);
cnt += popcnt64(sieve64[stop_idx] & m2);
return cnt;
return _mm512_reduce_add_epi64(vcnt);
}
}

Expand Down

0 comments on commit 20a77db

Please sign in to comment.