diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index f0581eea..0b79cb82 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -114,7 +114,6 @@ jobs: submodules: 'recursive' - uses: uraimo/run-on-arch-action@v2.5.0 name: Build in non-x86 container - continue-on-error: ${{ contains(fromJson('["ppc64le", "s390x"]'), matrix.arch) }} id: build with: arch: ${{ matrix.arch }} @@ -153,7 +152,9 @@ jobs: cmake -DCMAKE_CXX_FLAGS="-Werror" -DBUILD_EXECUTABLE=ON .. echo "Build with $(nproc) thread(s)" make -j$(nproc) - ./cpu_features/list_cpu_features + if [ -f ./cpu_features/list_cpu_features ]; then + ./cpu_features/list_cpu_features + fi ./apps/volk-config-info --alignment ./apps/volk-config-info --avail-machines ./apps/volk-config-info --all-machines diff --git a/kernels/volk/volk_32fc_s32fc_multiply_32fc.h b/kernels/volk/volk_32fc_s32fc_multiply_32fc.h index 1593b7cb..5a29712f 100644 --- a/kernels/volk/volk_32fc_s32fc_multiply_32fc.h +++ b/kernels/volk/volk_32fc_s32fc_multiply_32fc.h @@ -18,11 +18,11 @@ * Dispatcher Prototype * \code * void volk_32fc_s32fc_multiply_32fc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const - * lv_32fc_t scalar, unsigned int num_points); \endcode + * lv_32fc_t* scalar, unsigned int num_points); \endcode * * \b Inputs * \li aVector: The input vector to be multiplied. - * \li scalar The complex scalar to multiply against aVector. + * \li scalar: The complex scalar to multiply against aVector. * \li num_points: The number of complex values in aVector. * * \b Outputs @@ -46,7 +46,7 @@ * in[ii+N/2] = lv_cmake(-real, -imag); * } * - * volk_32fc_s32fc_multiply_32fc(out, in, scalar, N); + * volk_32fc_s32fc_multiply_32fc(out, in, &scalar, N); * * printf(" mag phase | mag phase\n"); * for(unsigned int ii = 0; ii < N; ++ii){ @@ -73,7 +73,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector, const lv_32fc_t* aVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { unsigned int number = 0; @@ -85,8 +85,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector, const lv_32fc_t* a = aVector; // Set up constant scalar vector - yl = _mm256_set1_ps(lv_creal(scalar)); - yh = _mm256_set1_ps(lv_cimag(scalar)); + yl = _mm256_set1_ps(lv_creal(*scalar)); + yh = _mm256_set1_ps(lv_cimag(*scalar)); for (; number < quarterPoints; number++) { x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi @@ -107,7 +107,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector, } for (i = num_points - isodd; i < num_points; i++) { - *c++ = (*a++) * scalar; + *c++ = (*a++) * (*scalar); } } #endif /* LV_HAVE_AVX && LV_HAVE_FMA */ @@ -117,7 +117,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector, static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector, const lv_32fc_t* aVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { unsigned int number = 0; @@ -129,8 +129,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector, const lv_32fc_t* a = aVector; // Set up constant scalar vector - yl = _mm256_set1_ps(lv_creal(scalar)); - yh = _mm256_set1_ps(lv_cimag(scalar)); + yl = _mm256_set1_ps(lv_creal(*scalar)); + yh = _mm256_set1_ps(lv_cimag(*scalar)); for (; number < quarterPoints; number++) { x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi @@ -151,7 +151,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector, } for (i = num_points - isodd; i < num_points; i++) { - *c++ = (*a++) * scalar; + *c++ = (*a++) * (*scalar); } } #endif /* LV_HAVE_AVX */ @@ -161,7 +161,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector, static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { unsigned int number = 0; @@ -172,8 +172,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* a = aVector; // Set up constant scalar vector - yl = _mm_set_ps1(lv_creal(scalar)); - yh = _mm_set_ps1(lv_cimag(scalar)); + yl = _mm_set_ps1(lv_creal(*scalar)); + yh = _mm_set_ps1(lv_cimag(*scalar)); for (; number < halfPoints; number++) { @@ -195,7 +195,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, } if ((num_points % 2) != 0) { - *c = (*a) * scalar; + *c = (*a) * (*scalar); } } #endif /* LV_HAVE_SSE */ @@ -204,7 +204,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { lv_32fc_t* cPtr = cVector; @@ -213,20 +213,20 @@ static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector, // unwrap loop while (number >= 8) { - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * (*scalar); + *cPtr++ = (*aPtr++) * (*scalar); + *cPtr++ = (*aPtr++) * (*scalar); + *cPtr++ = (*aPtr++) * (*scalar); + *cPtr++ = (*aPtr++) * (*scalar); + *cPtr++ = (*aPtr++) * (*scalar); + *cPtr++ = (*aPtr++) * (*scalar); + *cPtr++ = (*aPtr++) * (*scalar); number -= 8; } // clean up any remaining while (number-- > 0) - *cPtr++ = *aPtr++ * scalar; + *cPtr++ = *aPtr++ * (*scalar); } #endif /* LV_HAVE_GENERIC */ @@ -245,7 +245,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector, static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector, const lv_32fc_t* aVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { unsigned int number = 0; @@ -257,8 +257,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector, const lv_32fc_t* a = aVector; // Set up constant scalar vector - yl = _mm256_set1_ps(lv_creal(scalar)); - yh = _mm256_set1_ps(lv_cimag(scalar)); + yl = _mm256_set1_ps(lv_creal(*scalar)); + yh = _mm256_set1_ps(lv_cimag(*scalar)); for (; number < quarterPoints; number++) { x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi @@ -279,7 +279,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector, } for (i = num_points - isodd; i < num_points; i++) { - *c++ = (*a++) * scalar; + *c++ = (*a++) * (*scalar); } } #endif /* LV_HAVE_AVX && LV_HAVE_FMA */ @@ -290,7 +290,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector, static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector, const lv_32fc_t* aVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { unsigned int number = 0; @@ -302,8 +302,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector, const lv_32fc_t* a = aVector; // Set up constant scalar vector - yl = _mm256_set1_ps(lv_creal(scalar)); - yh = _mm256_set1_ps(lv_cimag(scalar)); + yl = _mm256_set1_ps(lv_creal(*scalar)); + yh = _mm256_set1_ps(lv_cimag(*scalar)); for (; number < quarterPoints; number++) { x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi @@ -324,7 +324,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector, } for (i = num_points - isodd; i < num_points; i++) { - *c++ = (*a++) * scalar; + *c++ = (*a++) * (*scalar); } } #endif /* LV_HAVE_AVX */ @@ -334,7 +334,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector, static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { unsigned int number = 0; @@ -345,8 +345,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* a = aVector; // Set up constant scalar vector - yl = _mm_set_ps1(lv_creal(scalar)); - yh = _mm_set_ps1(lv_cimag(scalar)); + yl = _mm_set_ps1(lv_creal(*scalar)); + yh = _mm_set_ps1(lv_cimag(*scalar)); for (; number < halfPoints; number++) { @@ -368,7 +368,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, } if ((num_points % 2) != 0) { - *c = (*a) * scalar; + *c = (*a) * (*scalar); } } #endif /* LV_HAVE_SSE */ @@ -378,7 +378,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector, const lv_32fc_t* aVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { lv_32fc_t* cPtr = cVector; @@ -389,8 +389,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector, float32x4x2_t a_val, scalar_val; float32x4x2_t tmp_imag; - scalar_val.val[0] = vld1q_dup_f32((const float*)&scalar); - scalar_val.val[1] = vld1q_dup_f32(((const float*)&scalar) + 1); + scalar_val.val[0] = vld1q_dup_f32((const float*)scalar); + scalar_val.val[1] = vld1q_dup_f32(((const float*)scalar) + 1); for (number = 0; number < quarter_points; ++number) { a_val = vld2q_f32((float*)aPtr); tmp_imag.val[1] = vmulq_f32(a_val.val[1], scalar_val.val[0]); @@ -405,7 +405,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector, } for (number = quarter_points * 4; number < num_points; number++) { - *cPtr++ = *aPtr++ * scalar; + *cPtr++ = *aPtr++ * (*scalar); } } #endif /* LV_HAVE_NEON */ diff --git a/kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h b/kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h index e328a311..d79761a7 100644 --- a/kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h +++ b/kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h @@ -21,15 +21,15 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, unsigned int num_points) { lv_32fc_t phase[1] = { lv_cmake(.3f, 0.95393f) }; (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); const lv_32fc_t phase_inc_n = - phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc)); + *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc)); volk_32fc_s32fc_x2_rotator_32fc_generic( - outVector, inVector, phase_inc_n, phase, num_points); + outVector, inVector, &phase_inc_n, phase, num_points); } #endif /* LV_HAVE_GENERIC */ @@ -41,15 +41,15 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_generic(lv_32fc_t* outVect static inline void volk_32fc_s32fc_rotatorpuppet_32fc_neon(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, unsigned int num_points) { lv_32fc_t phase[1] = { lv_cmake(.3f, 0.95393f) }; (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); const lv_32fc_t phase_inc_n = - phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc)); + *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc)); volk_32fc_s32fc_x2_rotator_32fc_neon( - outVector, inVector, phase_inc_n, phase, num_points); + outVector, inVector, &phase_inc_n, phase, num_points); } #endif /* LV_HAVE_NEON */ @@ -60,15 +60,15 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_neon(lv_32fc_t* outVector, static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, unsigned int num_points) { lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) }; (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); const lv_32fc_t phase_inc_n = - phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc)); + *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc)); volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1( - outVector, inVector, phase_inc_n, phase, num_points); + outVector, inVector, &phase_inc_n, phase, num_points); } #endif /* LV_HAVE_SSE4_1 */ @@ -78,15 +78,15 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_sse4_1(lv_32fc_t* outVec #include static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, unsigned int num_points) { lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) }; (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); const lv_32fc_t phase_inc_n = - phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc)); + *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc)); volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1( - outVector, inVector, phase_inc_n, phase, num_points); + outVector, inVector, &phase_inc_n, phase, num_points); } #endif /* LV_HAVE_SSE4_1 */ @@ -97,15 +97,15 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_sse4_1(lv_32fc_t* outVec static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, unsigned int num_points) { lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) }; (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); const lv_32fc_t phase_inc_n = - phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc)); + *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc)); volk_32fc_s32fc_x2_rotator_32fc_a_avx( - outVector, inVector, phase_inc_n, phase, num_points); + outVector, inVector, &phase_inc_n, phase, num_points); } #endif /* LV_HAVE_AVX */ @@ -116,15 +116,15 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_avx(lv_32fc_t* outVector static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, unsigned int num_points) { lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) }; (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); const lv_32fc_t phase_inc_n = - phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc)); + *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc)); volk_32fc_s32fc_x2_rotator_32fc_u_avx( - outVector, inVector, phase_inc_n, phase, num_points); + outVector, inVector, &phase_inc_n, phase, num_points); } #endif /* LV_HAVE_AVX */ @@ -132,17 +132,18 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_avx(lv_32fc_t* outVector #if LV_HAVE_AVX && LV_HAVE_FMA #include -static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_avx_fma(lv_32fc_t* outVector, - const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, - unsigned int num_points) +static inline void +volk_32fc_s32fc_rotatorpuppet_32fc_a_avx_fma(lv_32fc_t* outVector, + const lv_32fc_t* inVector, + const lv_32fc_t* phase_inc, + unsigned int num_points) { lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) }; (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); const lv_32fc_t phase_inc_n = - phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc)); + *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc)); volk_32fc_s32fc_x2_rotator_32fc_a_avx_fma( - outVector, inVector, phase_inc_n, phase, num_points); + outVector, inVector, &phase_inc_n, phase, num_points); } #endif /* LV_HAVE_AVX && LV_HAVE_FMA*/ @@ -151,17 +152,18 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_avx_fma(lv_32fc_t* outVe #if LV_HAVE_AVX && LV_HAVE_FMA #include -static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_avx_fma(lv_32fc_t* outVector, - const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, - unsigned int num_points) +static inline void +volk_32fc_s32fc_rotatorpuppet_32fc_u_avx_fma(lv_32fc_t* outVector, + const lv_32fc_t* inVector, + const lv_32fc_t* phase_inc, + unsigned int num_points) { lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) }; (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); const lv_32fc_t phase_inc_n = - phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc)); + *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc)); volk_32fc_s32fc_x2_rotator_32fc_u_avx_fma( - outVector, inVector, phase_inc_n, phase, num_points); + outVector, inVector, &phase_inc_n, phase, num_points); } #endif /* LV_HAVE_AVX && LV_HAVE_FMA*/ diff --git a/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h b/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h index aebf46d5..e3e9be0b 100644 --- a/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h +++ b/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h @@ -18,7 +18,7 @@ * Dispatcher Prototype * \code * void volk_32fc_s32fc_x2_rotator_32fc(lv_32fc_t* outVector, const lv_32fc_t* inVector, - * const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) \endcode + * const lv_32fc_t* phase_inc, lv_32fc_t* phase, unsigned int num_points) \endcode * * \b Inputs * \li inVector: Vector to be rotated. @@ -82,7 +82,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_generic(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, lv_32fc_t* phase, unsigned int num_points) { @@ -91,14 +91,14 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_generic(lv_32fc_t* outVector, for (i = 0; i < (unsigned int)(num_points / ROTATOR_RELOAD); ++i) { for (j = 0; j < ROTATOR_RELOAD; ++j) { *outVector++ = *inVector++ * (*phase); - (*phase) *= phase_inc; + (*phase) *= *phase_inc; } (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); } for (i = 0; i < num_points % ROTATOR_RELOAD; ++i) { *outVector++ = *inVector++ * (*phase); - (*phase) *= phase_inc; + (*phase) *= *phase_inc; } if (i) { // Make sure, we normalize phase on every call! @@ -115,7 +115,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_generic(lv_32fc_t* outVector, static inline void volk_32fc_s32fc_x2_rotator_32fc_neon(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, lv_32fc_t* phase, unsigned int num_points) @@ -132,7 +132,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_neon(lv_32fc_t* outVector, for (i = 0; i < 4; ++i) { phasePtr[i] *= incr; - incr *= (phase_inc); + incr *= (*phase_inc); } // Notice that incr has be incremented in the previous loop @@ -194,7 +194,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_neon(lv_32fc_t* outVector, // Deal with the rest for (i = 0; i < num_points % 4; i++) { *outputVectorPtr++ = *inputVectorPtr++ * phasePtr[0]; - phasePtr[0] *= (phase_inc); + phasePtr[0] *= (*phase_inc); } // For continuous phase next time we need to call this function @@ -209,7 +209,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_neon(lv_32fc_t* outVector, static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, lv_32fc_t* phase, unsigned int num_points) { @@ -222,7 +222,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector for (i = 0; i < 2; ++i) { phase_Ptr[i] *= incr; - incr *= (phase_inc); + incr *= (*phase_inc); } __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p; @@ -298,7 +298,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector _mm_storeu_ps((float*)phase_Ptr, phase_Val); if (num_points & 1) { *cPtr++ = *aPtr++ * phase_Ptr[0]; - phase_Ptr[0] *= (phase_inc); + phase_Ptr[0] *= (*phase_inc); } (*phase) = phase_Ptr[0]; @@ -312,7 +312,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, lv_32fc_t* phase, unsigned int num_points) { @@ -325,7 +325,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector for (i = 0; i < 2; ++i) { phase_Ptr[i] *= incr; - incr *= (phase_inc); + incr *= (*phase_inc); } /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0])); @@ -404,7 +404,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector _mm_storeu_ps((float*)phase_Ptr, phase_Val); if (num_points & 1) { *cPtr++ = *aPtr++ * phase_Ptr[0]; - phase_Ptr[0] *= (phase_inc); + phase_Ptr[0] *= (*phase_inc); } (*phase) = phase_Ptr[0]; @@ -419,7 +419,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, lv_32fc_t* phase, unsigned int num_points) { @@ -432,7 +432,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, for (i = 0; i < 4; ++i) { phase_Ptr[i] *= incr; - incr *= (phase_inc); + incr *= (*phase_inc); } __m256 aVal, phase_Val, z; @@ -493,7 +493,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, lv_32fc_t* phase, unsigned int num_points) { @@ -506,7 +506,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx(lv_32fc_t* outVector, for (i = 0; i < 4; ++i) { phase_Ptr[i] *= incr; - incr *= (phase_inc); + incr *= (*phase_inc); } __m256 aVal, phase_Val, z; @@ -565,7 +565,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx(lv_32fc_t* outVector, static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx_fma(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, lv_32fc_t* phase, unsigned int num_points) { @@ -579,7 +579,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx_fma(lv_32fc_t* outVecto for (i = 0; i < 4; ++i) { phase_Ptr[i] *= incr; - incr *= (phase_inc); + incr *= (*phase_inc); } __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p; @@ -661,7 +661,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx_fma(lv_32fc_t* outVecto _mm256_store_ps((float*)phase_Ptr, phase_Val); for (i = 0; i < num_points % 4; ++i) { *cPtr++ = *aPtr++ * phase_Ptr[0]; - phase_Ptr[0] *= (phase_inc); + phase_Ptr[0] *= (*phase_inc); } (*phase) = phase_Ptr[0]; @@ -674,7 +674,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx_fma(lv_32fc_t* outVecto static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx_fma(lv_32fc_t* outVector, const lv_32fc_t* inVector, - const lv_32fc_t phase_inc, + const lv_32fc_t* phase_inc, lv_32fc_t* phase, unsigned int num_points) { @@ -687,7 +687,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx_fma(lv_32fc_t* outVecto for (i = 0; i < 4; ++i) { phase_Ptr[i] *= incr; - incr *= (phase_inc); + incr *= (*phase_inc); } __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p; @@ -769,7 +769,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx_fma(lv_32fc_t* outVecto _mm256_storeu_ps((float*)phase_Ptr, phase_Val); for (i = 0; i < num_points % 4; ++i) { *cPtr++ = *aPtr++ * phase_Ptr[0]; - phase_Ptr[0] *= (phase_inc); + phase_Ptr[0] *= (*phase_inc); } (*phase) = phase_Ptr[0]; diff --git a/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add_32fc.h b/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add_32fc.h index 85cdaf16..70a0b31f 100644 --- a/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add_32fc.h +++ b/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add_32fc.h @@ -15,12 +15,12 @@ * Conjugate the input complex vector, multiply them by a complex scalar, * add the another input complex vector and returns the results. * - * c[i] = a[i] + conj(b[i]) * scalar + * c[i] = a[i] + conj(b[i]) * (*scalar) * * Dispatcher Prototype * \code * void volk_32fc_x2_s32fc_multiply_conjugate_add_32fc(lv_32fc_t* cVector, const - * lv_32fc_t* aVector, const lv_32fc_t* bVector, const lv_32fc_t scalar, unsigned int + * lv_32fc_t* aVector, const lv_32fc_t* bVector, const lv_32fc_t* scalar, unsigned int * num_points); \endcode * * \b Inputs @@ -58,7 +58,7 @@ * // update weight using output. * float real = lv_creal(output) * (1.0 - std::norm(output)) * MU; * lv_32fc_t factor = lv_cmake(real, 0.f); - * volk_32fc_x2_s32fc_multiply_conjugate_add_32fc(next, weight, state, factor, n_filter); + * volk_32fc_x2_s32fc_multiply_conjugate_add_32fc(next, weight, state, &factor, n_filter); * lv_32fc_t *tmp = next; * next = weight; * weight = tmp; @@ -85,7 +85,7 @@ static inline void volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { const lv_32fc_t* aPtr = aVector; @@ -95,20 +95,20 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_generic(lv_32fc_t* cVector, // unwrap loop while (number >= 8) { - *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar; - *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar; - *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar; - *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar; - *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar; - *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar; - *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar; - *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar; + *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar); + *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar); + *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar); + *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar); + *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar); + *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar); + *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar); + *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar); number -= 8; } // clean up any remaining while (number-- > 0) { - *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar; + *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar); } } #endif /* LV_HAVE_GENERIC */ @@ -122,7 +122,7 @@ static inline void volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_u_avx(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { unsigned int number = 0; @@ -131,7 +131,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_u_avx(lv_32fc_t* cVector, unsigned int isodd = num_points & 3; __m256 x, y, s, z; - lv_32fc_t v_scalar[4] = { scalar, scalar, scalar, scalar }; + lv_32fc_t v_scalar[4] = { *scalar, *scalar, *scalar, *scalar }; const lv_32fc_t* a = aVector; const lv_32fc_t* b = bVector; @@ -153,7 +153,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_u_avx(lv_32fc_t* cVector, } for (i = num_points - isodd; i < num_points; i++) { - *c++ = (*a++) + lv_conj(*b++) * scalar; + *c++ = (*a++) + lv_conj(*b++) * (*scalar); } } #endif /* LV_HAVE_AVX */ @@ -167,14 +167,14 @@ static inline void volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { unsigned int number = 0; const unsigned int halfPoints = num_points / 2; __m128 x, y, s, z; - lv_32fc_t v_scalar[2] = { scalar, scalar }; + lv_32fc_t v_scalar[2] = { *scalar, *scalar }; const lv_32fc_t* a = aVector; const lv_32fc_t* b = bVector; @@ -196,7 +196,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_u_sse3(lv_32fc_t* cVector, } if ((num_points % 2) != 0) { - *c = *a + lv_conj(*b) * scalar; + *c = *a + lv_conj(*b) * (*scalar); } } #endif /* LV_HAVE_SSE */ @@ -210,7 +210,7 @@ static inline void volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_a_avx(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { unsigned int number = 0; @@ -219,7 +219,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_a_avx(lv_32fc_t* cVector, unsigned int isodd = num_points & 3; __m256 x, y, s, z; - lv_32fc_t v_scalar[4] = { scalar, scalar, scalar, scalar }; + lv_32fc_t v_scalar[4] = { *scalar, *scalar, *scalar, *scalar }; const lv_32fc_t* a = aVector; const lv_32fc_t* b = bVector; @@ -241,7 +241,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_a_avx(lv_32fc_t* cVector, } for (i = num_points - isodd; i < num_points; i++) { - *c++ = (*a++) + lv_conj(*b++) * scalar; + *c++ = (*a++) + lv_conj(*b++) * (*scalar); } } #endif /* LV_HAVE_AVX */ @@ -255,14 +255,14 @@ static inline void volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { unsigned int number = 0; const unsigned int halfPoints = num_points / 2; __m128 x, y, s, z; - lv_32fc_t v_scalar[2] = { scalar, scalar }; + lv_32fc_t v_scalar[2] = { *scalar, *scalar }; const lv_32fc_t* a = aVector; const lv_32fc_t* b = bVector; @@ -284,7 +284,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_a_sse3(lv_32fc_t* cVector, } if ((num_points % 2) != 0) { - *c = *a + lv_conj(*b) * scalar; + *c = *a + lv_conj(*b) * (*scalar); } } #endif /* LV_HAVE_SSE */ @@ -297,7 +297,7 @@ static inline void volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_neon(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, - const lv_32fc_t scalar, + const lv_32fc_t* scalar, unsigned int num_points) { const lv_32fc_t* bPtr = bVector; @@ -309,8 +309,8 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_neon(lv_32fc_t* cVector, float32x4x2_t a_val, b_val, c_val, scalar_val; float32x4x2_t tmp_val; - scalar_val.val[0] = vld1q_dup_f32((const float*)&scalar); - scalar_val.val[1] = vld1q_dup_f32(((const float*)&scalar) + 1); + scalar_val.val[0] = vld1q_dup_f32((const float*)scalar); + scalar_val.val[1] = vld1q_dup_f32(((const float*)scalar) + 1); for (number = 0; number < quarter_points; ++number) { a_val = vld2q_f32((float*)aPtr); @@ -336,7 +336,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_neon(lv_32fc_t* cVector, } for (number = quarter_points * 4; number < num_points; number++) { - *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar; + *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar); } } #endif /* LV_HAVE_NEON */ diff --git a/lib/qa_utils.cc b/lib/qa_utils.cc index 4be7b8ad..a94d895c 100644 --- a/lib/qa_utils.cc +++ b/lib/qa_utils.cc @@ -355,7 +355,7 @@ inline void run_cast_test1_s32fc(volk_fn_1arg_s32fc func, std::string arch) { while (iter--) - func(buffs[0], scalar, vlen, arch.c_str()); + func(buffs[0], &scalar, vlen, arch.c_str()); } inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func, @@ -366,7 +366,7 @@ inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func, std::string arch) { while (iter--) - func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); + func(buffs[0], buffs[1], &scalar, vlen, arch.c_str()); } inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func, @@ -377,7 +377,7 @@ inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func, std::string arch) { while (iter--) - func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); + func(buffs[0], buffs[1], buffs[2], &scalar, vlen, arch.c_str()); } template diff --git a/lib/qa_utils.h b/lib/qa_utils.h index 40f549cf..a6567720 100644 --- a/lib/qa_utils.h +++ b/lib/qa_utils.h @@ -199,11 +199,11 @@ typedef void (*volk_fn_2arg_s32f)(void*, void*, float, unsigned int, const char* typedef void (*volk_fn_3arg_s32f)(void*, void*, void*, float, unsigned int, const char*); typedef void (*volk_fn_1arg_s32fc)( void*, - lv_32fc_t, + lv_32fc_t*, unsigned int, const char*); // one input vector, one scalar float input -typedef void (*volk_fn_2arg_s32fc)(void*, void*, lv_32fc_t, unsigned int, const char*); +typedef void (*volk_fn_2arg_s32fc)(void*, void*, lv_32fc_t*, unsigned int, const char*); typedef void (*volk_fn_3arg_s32fc)( - void*, void*, void*, lv_32fc_t, unsigned int, const char*); + void*, void*, void*, lv_32fc_t*, unsigned int, const char*); #endif // VOLK_QA_UTILS_H