Skip to content

Commit

Permalink
avoid alocating win
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Aug 5, 2024
1 parent 9747c6e commit 5b2c918
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 25 deletions.
2 changes: 1 addition & 1 deletion include/mcl/ec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -984,7 +984,7 @@ inline size_t argminForMulVec(size_t n)

/*
Extract w bits from yVec[i] starting at the pos-th bit, assign this value to v.
tbl[v] += xVec[i]
tbl[v-1] += xVec[i]
win = xVec[0] + 2 xVec[1] + 3 xVec[2] + ... + tblN xVec[tblN-1]
*/
template<class G>
Expand Down
52 changes: 28 additions & 24 deletions src/msm_avx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,29 @@ inline void reduceSum(mcl::msm::G1A& Q, const G& P)
}
}

template<class G, class V>
void mulVecUpdateTable(G& win, G *tbl, size_t tblN, const G *xVec, const V *yVec, size_t yn, size_t pos, size_t n)
{
const Vec m = vpbroadcastq(tblN-1);
for (size_t i = 0; i < tblN; i++) {
tbl[i].clear();
}
for (size_t i = 0; i < n; i++) {
V v = getUnitAt(yVec+i*yn, yn, pos);
v = vpandq(v, m);
G T;
T.gather(tbl, v);
G::add(T, T, xVec[i]);
T.scatter(tbl, v);
}
G sum = tbl[tblN - 1];
win = sum;
for (size_t i = 1; i < tblN - 1; i++) {
G::add(sum, sum, tbl[tblN - 1- i]);
G::add(win, win, sum);
}
}

// xVec[n], yVec[n * maxBitSize/64]
template<class G=EcM, class V=Vec>
inline void mulVecAVX512_inner(mcl::msm::G1A& P, const G *xVec, const V *yVec, size_t n, size_t maxBitSize)
Expand All @@ -1005,37 +1028,18 @@ inline void mulVecAVX512_inner(mcl::msm::G1A& P, const G *xVec, const V *yVec, s
G *tbl = (G*)Xbyak::AlignedMalloc(sizeof(G) * tblN, 64);
const size_t yn = maxBitSize / 64;
const size_t winN = (maxBitSize + c-1) / c;
G *win = (G*)Xbyak::AlignedMalloc(sizeof(G) * winN, 64);

const Vec m = vpbroadcastq(tblN-1);
for (size_t w = 0; w < winN; w++) {
for (size_t i = 0; i < tblN; i++) {
tbl[i].clear();
}
for (size_t i = 0; i < n; i++) {
V v = getUnitAt(yVec+i*yn, yn, c*w);
v = vpandq(v, m);
G T;
T.gather(tbl, v);
G::add(T, T, xVec[i]);
T.scatter(tbl, v);
}
G sum = tbl[tblN - 1];
win[w] = sum;
for (size_t i = 1; i < tblN - 1; i++) {
G::add(sum, sum, tbl[tblN - 1- i]);
G::add(win[w], win[w], sum);
}
}
G T = win[winN - 1];
G T;
mulVecUpdateTable<G, V>(T, tbl, tblN, xVec, yVec, yn, c*(winN-1), n);
for (size_t w = 1; w < winN; w++) {
for (size_t i = 0; i < c; i++) {
G::dbl(T, T);
}
G::add(T, T, win[winN - 1- w]);
G win;
mulVecUpdateTable<G, V>(win, tbl, tblN, xVec, yVec, yn, c*(winN-1-w), n);
G::add(T, T, win);
}
reduceSum(P, T);
Xbyak::AlignedFree(win);
Xbyak::AlignedFree(tbl);
}

Expand Down

0 comments on commit 5b2c918

Please sign in to comment.