Skip to content

Commit

Permalink
added comments on VSX code path
Browse files Browse the repository at this point in the history
which is still disabled for now.
  • Loading branch information
Cyan4973 committed Aug 15, 2019
1 parent eccd082 commit dfd1bf1
Showing 1 changed file with 15 additions and 8 deletions.
23 changes: 15 additions & 8 deletions xxh3.h
Original file line number Diff line number Diff line change
Expand Up @@ -558,8 +558,9 @@ XXH3_accumulate_512( void* XXH_RESTRICT acc,
}
}

#elif (XXH_VECTOR == XXH_VSX) && 0 /* <=========================== MUST BE UPDATED */
/* note : vsx code path currently not tested in CI (limitation of cross-compiler and/or emulator) */
#elif (XXH_VECTOR == XXH_VSX) && 0 /* <=========================== DISABLED : MUST BE VALIDATED */
/* note : vsx code path currently not tested in CI (limitation of cross-compiler and/or emulator)
* for vsx code path to be shipped and supported, it is critical to create a CI test for it */
U64x2* const xacc = (U64x2*) acc; /* presumed aligned */
U64x2 const* const xdata = (U64x2 const*) data; /* no alignment restriction */
U64x2 const* const xkey = (U64x2 const*) key; /* no alignment restriction */
Expand All @@ -571,20 +572,26 @@ XXH3_accumulate_512( void* XXH_RESTRICT acc,
/* key_vec = xkey[i]; */
#ifdef __BIG_ENDIAN__
/* byteswap */
U64x2 const data_vec = vec_revb(vec_vsx_ld(0, xdata + i));
U64x2 const key_vec = vec_revb(vec_vsx_ld(0, xkey + i));
U64x2 const data_vec = vec_revb(vec_vsx_ld(0, xdata + i)); /* note : vec_revb is power9+ */
U64x2 const key_vec = vec_revb(vec_vsx_ld(0, xkey + i)); /* note : vec_revb is power9+ */
#else
U64x2 const data_vec = vec_vsx_ld(0, xdata + i);
U64x2 const key_vec = vec_vsx_ld(0, xkey + i);
#endif
U64x2 data_key = data_vec ^ key_vec;
U64x2 const data_key = data_vec ^ key_vec;
/* shuffled = (data_key << 32) | (data_key >> 32); */
U32x4 shuffled = (U32x4)vec_rl(data_key, v32);
U32x4 const shuffled = (U32x4)vec_rl(data_key, v32);
/* product = ((U64x2)data_key & 0xFFFFFFFF) * ((U64x2)shuffled & 0xFFFFFFFF); */
U64x2 product = XXH_vsxMultOdd((U32x4)data_key, shuffled);
U64x2 const product = XXH_vsxMultOdd((U32x4)data_key, shuffled);

xacc[i] += product;
xacc[i] += data_vec; /* <======================= Incorrect for 128-bit variant (must swap) */

if (accWidth == XXH3_acc_64bits) {
xacc[i] += data_vec;
} else { /* XXH3_acc_128bits */
U64x2 const data_swapped = vec_permi(data_vec, data_vec, 2); /* <===== untested !!! */
xacc[i] += data_swapped;
}
}

#else /* scalar variant of Accumulator - universal */
Expand Down

0 comments on commit dfd1bf1

Please sign in to comment.