Skip to content

Commit

Permalink
ntt/kernels.cu: manually unroll the inner loop in LDE_spread_distribu…
Browse files Browse the repository at this point in the history
…te_powers.
  • Loading branch information
dot-asm committed Jun 19, 2024
1 parent b739cdd commit 41f8f17
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions ntt/kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -224,15 +224,18 @@ void LDE_spread_distribute_powers(fr_t* out, fr_t* in,
else
__syncthreads();


for (uint32_t i = 0; i < blowup; i++) {
uint32_t offset = i * blockDim.x + threadIdx.x;

for (uint32_t offset = threadIdx.x, i = 0; i < blowup; i += 2) {
r.zero();
if ((offset & (blowup-1)) == 0)
r = exchange[offset >> lg_blowup];
out[(idx0 << lg_blowup) + offset] = r;
offset += blockDim.x;

r.zero();
if ((offset & (blowup-1)) == 0)
r = exchange[offset >> lg_blowup];
out[(idx0 << lg_blowup) + offset] = r;
offset += blockDim.x;
}

idx0 += stride;
Expand Down

0 comments on commit 41f8f17

Please sign in to comment.