Skip to content

Commit

Permalink
Use pointers to pass in s32fc arguments
Browse files Browse the repository at this point in the history
This avoids undefined behaviour arising from incompatibility between
complex numbers in C and C++.

Signed-off-by: Clayton Smith <argilo@gmail.com>
  • Loading branch information
argilo committed Nov 7, 2023
1 parent 56a893c commit 7e20f80
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 131 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ jobs:
submodules: 'recursive'
- uses: uraimo/run-on-arch-action@v2.5.0
name: Build in non-x86 container
continue-on-error: ${{ contains(fromJson('["ppc64le", "s390x"]'), matrix.arch) }}
id: build
with:
arch: ${{ matrix.arch }}
Expand Down Expand Up @@ -153,7 +152,9 @@ jobs:
cmake -DCMAKE_CXX_FLAGS="-Werror" -DBUILD_EXECUTABLE=ON ..
echo "Build with $(nproc) thread(s)"
make -j$(nproc)
./cpu_features/list_cpu_features
if [ -f ./cpu_features/list_cpu_features ]; then
./cpu_features/list_cpu_features
fi
./apps/volk-config-info --alignment
./apps/volk-config-info --avail-machines
./apps/volk-config-info --all-machines
Expand Down
82 changes: 41 additions & 41 deletions kernels/volk/volk_32fc_s32fc_multiply_32fc.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
* <b>Dispatcher Prototype</b>
* \code
* void volk_32fc_s32fc_multiply_32fc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const
* lv_32fc_t scalar, unsigned int num_points); \endcode
* lv_32fc_t* scalar, unsigned int num_points); \endcode
*
* \b Inputs
* \li aVector: The input vector to be multiplied.
* \li scalar The complex scalar to multiply against aVector.
* \li scalar: The complex scalar to multiply against aVector.
* \li num_points: The number of complex values in aVector.
*
* \b Outputs
Expand All @@ -46,7 +46,7 @@
* in[ii+N/2] = lv_cmake(-real, -imag);
* }
*
* volk_32fc_s32fc_multiply_32fc(out, in, scalar, N);
* volk_32fc_s32fc_multiply_32fc(out, in, &scalar, N);
*
* printf(" mag phase | mag phase\n");
* for(unsigned int ii = 0; ii < N; ++ii){
Expand All @@ -73,7 +73,7 @@

static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t scalar,
const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
Expand All @@ -85,8 +85,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;

// Set up constant scalar vector
yl = _mm256_set1_ps(lv_creal(scalar));
yh = _mm256_set1_ps(lv_cimag(scalar));
yl = _mm256_set1_ps(lv_creal(*scalar));
yh = _mm256_set1_ps(lv_cimag(*scalar));

for (; number < quarterPoints; number++) {
x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
Expand All @@ -107,7 +107,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
}

for (i = num_points - isodd; i < num_points; i++) {
*c++ = (*a++) * scalar;
*c++ = (*a++) * (*scalar);
}
}
#endif /* LV_HAVE_AVX && LV_HAVE_FMA */
Expand All @@ -117,7 +117,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,

static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t scalar,
const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
Expand All @@ -129,8 +129,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;

// Set up constant scalar vector
yl = _mm256_set1_ps(lv_creal(scalar));
yh = _mm256_set1_ps(lv_cimag(scalar));
yl = _mm256_set1_ps(lv_creal(*scalar));
yh = _mm256_set1_ps(lv_cimag(*scalar));

for (; number < quarterPoints; number++) {
x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
Expand All @@ -151,7 +151,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
}

for (i = num_points - isodd; i < num_points; i++) {
*c++ = (*a++) * scalar;
*c++ = (*a++) * (*scalar);
}
}
#endif /* LV_HAVE_AVX */
Expand All @@ -161,7 +161,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,

static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t scalar,
const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
Expand All @@ -172,8 +172,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;

// Set up constant scalar vector
yl = _mm_set_ps1(lv_creal(scalar));
yh = _mm_set_ps1(lv_cimag(scalar));
yl = _mm_set_ps1(lv_creal(*scalar));
yh = _mm_set_ps1(lv_cimag(*scalar));

for (; number < halfPoints; number++) {

Expand All @@ -195,7 +195,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
}

if ((num_points % 2) != 0) {
*c = (*a) * scalar;
*c = (*a) * (*scalar);
}
}
#endif /* LV_HAVE_SSE */
Expand All @@ -204,7 +204,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,

static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t scalar,
const lv_32fc_t* scalar,
unsigned int num_points)
{
lv_32fc_t* cPtr = cVector;
Expand All @@ -213,20 +213,20 @@ static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,

// unwrap loop
while (number >= 8) {
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * (*scalar);
*cPtr++ = (*aPtr++) * (*scalar);
*cPtr++ = (*aPtr++) * (*scalar);
*cPtr++ = (*aPtr++) * (*scalar);
*cPtr++ = (*aPtr++) * (*scalar);
*cPtr++ = (*aPtr++) * (*scalar);
*cPtr++ = (*aPtr++) * (*scalar);
*cPtr++ = (*aPtr++) * (*scalar);
number -= 8;
}

// clean up any remaining
while (number-- > 0)
*cPtr++ = *aPtr++ * scalar;
*cPtr++ = *aPtr++ * (*scalar);
}
#endif /* LV_HAVE_GENERIC */

Expand All @@ -245,7 +245,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,

static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t scalar,
const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
Expand All @@ -257,8 +257,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;

// Set up constant scalar vector
yl = _mm256_set1_ps(lv_creal(scalar));
yh = _mm256_set1_ps(lv_cimag(scalar));
yl = _mm256_set1_ps(lv_creal(*scalar));
yh = _mm256_set1_ps(lv_cimag(*scalar));

for (; number < quarterPoints; number++) {
x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
Expand All @@ -279,7 +279,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
}

for (i = num_points - isodd; i < num_points; i++) {
*c++ = (*a++) * scalar;
*c++ = (*a++) * (*scalar);
}
}
#endif /* LV_HAVE_AVX && LV_HAVE_FMA */
Expand All @@ -290,7 +290,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,

static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t scalar,
const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
Expand All @@ -302,8 +302,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;

// Set up constant scalar vector
yl = _mm256_set1_ps(lv_creal(scalar));
yh = _mm256_set1_ps(lv_cimag(scalar));
yl = _mm256_set1_ps(lv_creal(*scalar));
yh = _mm256_set1_ps(lv_cimag(*scalar));

for (; number < quarterPoints; number++) {
x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
Expand All @@ -324,7 +324,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
}

for (i = num_points - isodd; i < num_points; i++) {
*c++ = (*a++) * scalar;
*c++ = (*a++) * (*scalar);
}
}
#endif /* LV_HAVE_AVX */
Expand All @@ -334,7 +334,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,

static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t scalar,
const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
Expand All @@ -345,8 +345,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;

// Set up constant scalar vector
yl = _mm_set_ps1(lv_creal(scalar));
yh = _mm_set_ps1(lv_cimag(scalar));
yl = _mm_set_ps1(lv_creal(*scalar));
yh = _mm_set_ps1(lv_cimag(*scalar));

for (; number < halfPoints; number++) {

Expand All @@ -368,7 +368,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
}

if ((num_points % 2) != 0) {
*c = (*a) * scalar;
*c = (*a) * (*scalar);
}
}
#endif /* LV_HAVE_SSE */
Expand All @@ -378,7 +378,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,

static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t scalar,
const lv_32fc_t* scalar,
unsigned int num_points)
{
lv_32fc_t* cPtr = cVector;
Expand All @@ -389,8 +389,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
float32x4x2_t a_val, scalar_val;
float32x4x2_t tmp_imag;

scalar_val.val[0] = vld1q_dup_f32((const float*)&scalar);
scalar_val.val[1] = vld1q_dup_f32(((const float*)&scalar) + 1);
scalar_val.val[0] = vld1q_dup_f32((const float*)scalar);
scalar_val.val[1] = vld1q_dup_f32(((const float*)scalar) + 1);
for (number = 0; number < quarter_points; ++number) {
a_val = vld2q_f32((float*)aPtr);
tmp_imag.val[1] = vmulq_f32(a_val.val[1], scalar_val.val[0]);
Expand All @@ -405,7 +405,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
}

for (number = quarter_points * 4; number < num_points; number++) {
*cPtr++ = *aPtr++ * scalar;
*cPtr++ = *aPtr++ * (*scalar);
}
}
#endif /* LV_HAVE_NEON */
Expand Down
Loading

0 comments on commit 7e20f80

Please sign in to comment.