diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index f0581eea..0b79cb82 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -114,7 +114,6 @@ jobs:
submodules: 'recursive'
- uses: uraimo/run-on-arch-action@v2.5.0
name: Build in non-x86 container
- continue-on-error: ${{ contains(fromJson('["ppc64le", "s390x"]'), matrix.arch) }}
id: build
with:
arch: ${{ matrix.arch }}
@@ -153,7 +152,9 @@ jobs:
cmake -DCMAKE_CXX_FLAGS="-Werror" -DBUILD_EXECUTABLE=ON ..
echo "Build with $(nproc) thread(s)"
make -j$(nproc)
- ./cpu_features/list_cpu_features
+ if [ -f ./cpu_features/list_cpu_features ]; then
+ ./cpu_features/list_cpu_features
+ fi
./apps/volk-config-info --alignment
./apps/volk-config-info --avail-machines
./apps/volk-config-info --all-machines
diff --git a/kernels/volk/volk_32fc_s32fc_multiply_32fc.h b/kernels/volk/volk_32fc_s32fc_multiply_32fc.h
index 1593b7cb..5a29712f 100644
--- a/kernels/volk/volk_32fc_s32fc_multiply_32fc.h
+++ b/kernels/volk/volk_32fc_s32fc_multiply_32fc.h
@@ -18,11 +18,11 @@
* Dispatcher Prototype
* \code
* void volk_32fc_s32fc_multiply_32fc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const
- * lv_32fc_t scalar, unsigned int num_points); \endcode
+ * lv_32fc_t* scalar, unsigned int num_points); \endcode
*
* \b Inputs
* \li aVector: The input vector to be multiplied.
- * \li scalar The complex scalar to multiply against aVector.
+ * \li scalar: The complex scalar to multiply against aVector.
* \li num_points: The number of complex values in aVector.
*
* \b Outputs
@@ -46,7 +46,7 @@
* in[ii+N/2] = lv_cmake(-real, -imag);
* }
*
- * volk_32fc_s32fc_multiply_32fc(out, in, scalar, N);
+ * volk_32fc_s32fc_multiply_32fc(out, in, &scalar, N);
*
* printf(" mag phase | mag phase\n");
* for(unsigned int ii = 0; ii < N; ++ii){
@@ -73,7 +73,7 @@
static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
@@ -85,8 +85,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;
// Set up constant scalar vector
- yl = _mm256_set1_ps(lv_creal(scalar));
- yh = _mm256_set1_ps(lv_cimag(scalar));
+ yl = _mm256_set1_ps(lv_creal(*scalar));
+ yh = _mm256_set1_ps(lv_cimag(*scalar));
for (; number < quarterPoints; number++) {
x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -107,7 +107,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
}
for (i = num_points - isodd; i < num_points; i++) {
- *c++ = (*a++) * scalar;
+ *c++ = (*a++) * (*scalar);
}
}
#endif /* LV_HAVE_AVX && LV_HAVE_FMA */
@@ -117,7 +117,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
@@ -129,8 +129,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;
// Set up constant scalar vector
- yl = _mm256_set1_ps(lv_creal(scalar));
- yh = _mm256_set1_ps(lv_cimag(scalar));
+ yl = _mm256_set1_ps(lv_creal(*scalar));
+ yh = _mm256_set1_ps(lv_cimag(*scalar));
for (; number < quarterPoints; number++) {
x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -151,7 +151,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
}
for (i = num_points - isodd; i < num_points; i++) {
- *c++ = (*a++) * scalar;
+ *c++ = (*a++) * (*scalar);
}
}
#endif /* LV_HAVE_AVX */
@@ -161,7 +161,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
@@ -172,8 +172,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;
// Set up constant scalar vector
- yl = _mm_set_ps1(lv_creal(scalar));
- yh = _mm_set_ps1(lv_cimag(scalar));
+ yl = _mm_set_ps1(lv_creal(*scalar));
+ yh = _mm_set_ps1(lv_cimag(*scalar));
for (; number < halfPoints; number++) {
@@ -195,7 +195,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
}
if ((num_points % 2) != 0) {
- *c = (*a) * scalar;
+ *c = (*a) * (*scalar);
}
}
#endif /* LV_HAVE_SSE */
@@ -204,7 +204,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
lv_32fc_t* cPtr = cVector;
@@ -213,20 +213,20 @@ static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,
// unwrap loop
while (number >= 8) {
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
- *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) * (*scalar);
number -= 8;
}
// clean up any remaining
while (number-- > 0)
- *cPtr++ = *aPtr++ * scalar;
+ *cPtr++ = *aPtr++ * (*scalar);
}
#endif /* LV_HAVE_GENERIC */
@@ -245,7 +245,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,
static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
@@ -257,8 +257,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;
// Set up constant scalar vector
- yl = _mm256_set1_ps(lv_creal(scalar));
- yh = _mm256_set1_ps(lv_cimag(scalar));
+ yl = _mm256_set1_ps(lv_creal(*scalar));
+ yh = _mm256_set1_ps(lv_cimag(*scalar));
for (; number < quarterPoints; number++) {
x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -279,7 +279,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
}
for (i = num_points - isodd; i < num_points; i++) {
- *c++ = (*a++) * scalar;
+ *c++ = (*a++) * (*scalar);
}
}
#endif /* LV_HAVE_AVX && LV_HAVE_FMA */
@@ -290,7 +290,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
@@ -302,8 +302,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;
// Set up constant scalar vector
- yl = _mm256_set1_ps(lv_creal(scalar));
- yh = _mm256_set1_ps(lv_cimag(scalar));
+ yl = _mm256_set1_ps(lv_creal(*scalar));
+ yh = _mm256_set1_ps(lv_cimag(*scalar));
for (; number < quarterPoints; number++) {
x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -324,7 +324,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
}
for (i = num_points - isodd; i < num_points; i++) {
- *c++ = (*a++) * scalar;
+ *c++ = (*a++) * (*scalar);
}
}
#endif /* LV_HAVE_AVX */
@@ -334,7 +334,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
@@ -345,8 +345,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
const lv_32fc_t* a = aVector;
// Set up constant scalar vector
- yl = _mm_set_ps1(lv_creal(scalar));
- yh = _mm_set_ps1(lv_cimag(scalar));
+ yl = _mm_set_ps1(lv_creal(*scalar));
+ yh = _mm_set_ps1(lv_cimag(*scalar));
for (; number < halfPoints; number++) {
@@ -368,7 +368,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
}
if ((num_points % 2) != 0) {
- *c = (*a) * scalar;
+ *c = (*a) * (*scalar);
}
}
#endif /* LV_HAVE_SSE */
@@ -378,7 +378,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
lv_32fc_t* cPtr = cVector;
@@ -389,8 +389,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
float32x4x2_t a_val, scalar_val;
float32x4x2_t tmp_imag;
- scalar_val.val[0] = vld1q_dup_f32((const float*)&scalar);
- scalar_val.val[1] = vld1q_dup_f32(((const float*)&scalar) + 1);
+ scalar_val.val[0] = vld1q_dup_f32((const float*)scalar);
+ scalar_val.val[1] = vld1q_dup_f32(((const float*)scalar) + 1);
for (number = 0; number < quarter_points; ++number) {
a_val = vld2q_f32((float*)aPtr);
tmp_imag.val[1] = vmulq_f32(a_val.val[1], scalar_val.val[0]);
@@ -405,7 +405,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
}
for (number = quarter_points * 4; number < num_points; number++) {
- *cPtr++ = *aPtr++ * scalar;
+ *cPtr++ = *aPtr++ * (*scalar);
}
}
#endif /* LV_HAVE_NEON */
diff --git a/kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h b/kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h
index e328a311..d79761a7 100644
--- a/kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h
+++ b/kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h
@@ -21,15 +21,15 @@
static inline void volk_32fc_s32fc_rotatorpuppet_32fc_generic(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
unsigned int num_points)
{
lv_32fc_t phase[1] = { lv_cmake(.3f, 0.95393f) };
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
const lv_32fc_t phase_inc_n =
- phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc));
+ *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc));
volk_32fc_s32fc_x2_rotator_32fc_generic(
- outVector, inVector, phase_inc_n, phase, num_points);
+ outVector, inVector, &phase_inc_n, phase, num_points);
}
#endif /* LV_HAVE_GENERIC */
@@ -41,15 +41,15 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_generic(lv_32fc_t* outVect
static inline void volk_32fc_s32fc_rotatorpuppet_32fc_neon(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
unsigned int num_points)
{
lv_32fc_t phase[1] = { lv_cmake(.3f, 0.95393f) };
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
const lv_32fc_t phase_inc_n =
- phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc));
+ *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc));
volk_32fc_s32fc_x2_rotator_32fc_neon(
- outVector, inVector, phase_inc_n, phase, num_points);
+ outVector, inVector, &phase_inc_n, phase, num_points);
}
#endif /* LV_HAVE_NEON */
@@ -60,15 +60,15 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_neon(lv_32fc_t* outVector,
static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_sse4_1(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
unsigned int num_points)
{
lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) };
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
const lv_32fc_t phase_inc_n =
- phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc));
+ *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc));
volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(
- outVector, inVector, phase_inc_n, phase, num_points);
+ outVector, inVector, &phase_inc_n, phase, num_points);
}
#endif /* LV_HAVE_SSE4_1 */
@@ -78,15 +78,15 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_sse4_1(lv_32fc_t* outVec
#include
static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_sse4_1(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
unsigned int num_points)
{
lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) };
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
const lv_32fc_t phase_inc_n =
- phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc));
+ *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc));
volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(
- outVector, inVector, phase_inc_n, phase, num_points);
+ outVector, inVector, &phase_inc_n, phase, num_points);
}
#endif /* LV_HAVE_SSE4_1 */
@@ -97,15 +97,15 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_sse4_1(lv_32fc_t* outVec
static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_avx(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
unsigned int num_points)
{
lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) };
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
const lv_32fc_t phase_inc_n =
- phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc));
+ *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc));
volk_32fc_s32fc_x2_rotator_32fc_a_avx(
- outVector, inVector, phase_inc_n, phase, num_points);
+ outVector, inVector, &phase_inc_n, phase, num_points);
}
#endif /* LV_HAVE_AVX */
@@ -116,15 +116,15 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_avx(lv_32fc_t* outVector
static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_avx(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
unsigned int num_points)
{
lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) };
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
const lv_32fc_t phase_inc_n =
- phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc));
+ *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc));
volk_32fc_s32fc_x2_rotator_32fc_u_avx(
- outVector, inVector, phase_inc_n, phase, num_points);
+ outVector, inVector, &phase_inc_n, phase, num_points);
}
#endif /* LV_HAVE_AVX */
@@ -132,17 +132,18 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_avx(lv_32fc_t* outVector
#if LV_HAVE_AVX && LV_HAVE_FMA
#include
-static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_avx_fma(lv_32fc_t* outVector,
- const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
- unsigned int num_points)
+static inline void
+volk_32fc_s32fc_rotatorpuppet_32fc_a_avx_fma(lv_32fc_t* outVector,
+ const lv_32fc_t* inVector,
+ const lv_32fc_t* phase_inc,
+ unsigned int num_points)
{
lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) };
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
const lv_32fc_t phase_inc_n =
- phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc));
+ *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc));
volk_32fc_s32fc_x2_rotator_32fc_a_avx_fma(
- outVector, inVector, phase_inc_n, phase, num_points);
+ outVector, inVector, &phase_inc_n, phase, num_points);
}
#endif /* LV_HAVE_AVX && LV_HAVE_FMA*/
@@ -151,17 +152,18 @@ static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_avx_fma(lv_32fc_t* outVe
#if LV_HAVE_AVX && LV_HAVE_FMA
#include
-static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_avx_fma(lv_32fc_t* outVector,
- const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
- unsigned int num_points)
+static inline void
+volk_32fc_s32fc_rotatorpuppet_32fc_u_avx_fma(lv_32fc_t* outVector,
+ const lv_32fc_t* inVector,
+ const lv_32fc_t* phase_inc,
+ unsigned int num_points)
{
lv_32fc_t phase[1] = { lv_cmake(.3f, .95393f) };
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
const lv_32fc_t phase_inc_n =
- phase_inc / hypotf(lv_creal(phase_inc), lv_cimag(phase_inc));
+ *phase_inc / hypotf(lv_creal(*phase_inc), lv_cimag(*phase_inc));
volk_32fc_s32fc_x2_rotator_32fc_u_avx_fma(
- outVector, inVector, phase_inc_n, phase, num_points);
+ outVector, inVector, &phase_inc_n, phase, num_points);
}
#endif /* LV_HAVE_AVX && LV_HAVE_FMA*/
diff --git a/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h b/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h
index aebf46d5..e3e9be0b 100644
--- a/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h
+++ b/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h
@@ -18,7 +18,7 @@
* Dispatcher Prototype
* \code
* void volk_32fc_s32fc_x2_rotator_32fc(lv_32fc_t* outVector, const lv_32fc_t* inVector,
- * const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) \endcode
+ * const lv_32fc_t* phase_inc, lv_32fc_t* phase, unsigned int num_points) \endcode
*
* \b Inputs
* \li inVector: Vector to be rotated.
@@ -82,7 +82,7 @@
static inline void volk_32fc_s32fc_x2_rotator_32fc_generic(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
lv_32fc_t* phase,
unsigned int num_points)
{
@@ -91,14 +91,14 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_generic(lv_32fc_t* outVector,
for (i = 0; i < (unsigned int)(num_points / ROTATOR_RELOAD); ++i) {
for (j = 0; j < ROTATOR_RELOAD; ++j) {
*outVector++ = *inVector++ * (*phase);
- (*phase) *= phase_inc;
+ (*phase) *= *phase_inc;
}
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
}
for (i = 0; i < num_points % ROTATOR_RELOAD; ++i) {
*outVector++ = *inVector++ * (*phase);
- (*phase) *= phase_inc;
+ (*phase) *= *phase_inc;
}
if (i) {
// Make sure, we normalize phase on every call!
@@ -115,7 +115,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_generic(lv_32fc_t* outVector,
static inline void volk_32fc_s32fc_x2_rotator_32fc_neon(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
lv_32fc_t* phase,
unsigned int num_points)
@@ -132,7 +132,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_neon(lv_32fc_t* outVector,
for (i = 0; i < 4; ++i) {
phasePtr[i] *= incr;
- incr *= (phase_inc);
+ incr *= (*phase_inc);
}
// Notice that incr has be incremented in the previous loop
@@ -194,7 +194,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_neon(lv_32fc_t* outVector,
// Deal with the rest
for (i = 0; i < num_points % 4; i++) {
*outputVectorPtr++ = *inputVectorPtr++ * phasePtr[0];
- phasePtr[0] *= (phase_inc);
+ phasePtr[0] *= (*phase_inc);
}
// For continuous phase next time we need to call this function
@@ -209,7 +209,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_neon(lv_32fc_t* outVector,
static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
lv_32fc_t* phase,
unsigned int num_points)
{
@@ -222,7 +222,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector
for (i = 0; i < 2; ++i) {
phase_Ptr[i] *= incr;
- incr *= (phase_inc);
+ incr *= (*phase_inc);
}
__m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
@@ -298,7 +298,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector
_mm_storeu_ps((float*)phase_Ptr, phase_Val);
if (num_points & 1) {
*cPtr++ = *aPtr++ * phase_Ptr[0];
- phase_Ptr[0] *= (phase_inc);
+ phase_Ptr[0] *= (*phase_inc);
}
(*phase) = phase_Ptr[0];
@@ -312,7 +312,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector
static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
lv_32fc_t* phase,
unsigned int num_points)
{
@@ -325,7 +325,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector
for (i = 0; i < 2; ++i) {
phase_Ptr[i] *= incr;
- incr *= (phase_inc);
+ incr *= (*phase_inc);
}
/*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
@@ -404,7 +404,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector
_mm_storeu_ps((float*)phase_Ptr, phase_Val);
if (num_points & 1) {
*cPtr++ = *aPtr++ * phase_Ptr[0];
- phase_Ptr[0] *= (phase_inc);
+ phase_Ptr[0] *= (*phase_inc);
}
(*phase) = phase_Ptr[0];
@@ -419,7 +419,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector
static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
lv_32fc_t* phase,
unsigned int num_points)
{
@@ -432,7 +432,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector,
for (i = 0; i < 4; ++i) {
phase_Ptr[i] *= incr;
- incr *= (phase_inc);
+ incr *= (*phase_inc);
}
__m256 aVal, phase_Val, z;
@@ -493,7 +493,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector,
static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
lv_32fc_t* phase,
unsigned int num_points)
{
@@ -506,7 +506,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx(lv_32fc_t* outVector,
for (i = 0; i < 4; ++i) {
phase_Ptr[i] *= incr;
- incr *= (phase_inc);
+ incr *= (*phase_inc);
}
__m256 aVal, phase_Val, z;
@@ -565,7 +565,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx(lv_32fc_t* outVector,
static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx_fma(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
lv_32fc_t* phase,
unsigned int num_points)
{
@@ -579,7 +579,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx_fma(lv_32fc_t* outVecto
for (i = 0; i < 4; ++i) {
phase_Ptr[i] *= incr;
- incr *= (phase_inc);
+ incr *= (*phase_inc);
}
__m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
@@ -661,7 +661,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx_fma(lv_32fc_t* outVecto
_mm256_store_ps((float*)phase_Ptr, phase_Val);
for (i = 0; i < num_points % 4; ++i) {
*cPtr++ = *aPtr++ * phase_Ptr[0];
- phase_Ptr[0] *= (phase_inc);
+ phase_Ptr[0] *= (*phase_inc);
}
(*phase) = phase_Ptr[0];
@@ -674,7 +674,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx_fma(lv_32fc_t* outVecto
static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx_fma(lv_32fc_t* outVector,
const lv_32fc_t* inVector,
- const lv_32fc_t phase_inc,
+ const lv_32fc_t* phase_inc,
lv_32fc_t* phase,
unsigned int num_points)
{
@@ -687,7 +687,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx_fma(lv_32fc_t* outVecto
for (i = 0; i < 4; ++i) {
phase_Ptr[i] *= incr;
- incr *= (phase_inc);
+ incr *= (*phase_inc);
}
__m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, tmp2p;
@@ -769,7 +769,7 @@ static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx_fma(lv_32fc_t* outVecto
_mm256_storeu_ps((float*)phase_Ptr, phase_Val);
for (i = 0; i < num_points % 4; ++i) {
*cPtr++ = *aPtr++ * phase_Ptr[0];
- phase_Ptr[0] *= (phase_inc);
+ phase_Ptr[0] *= (*phase_inc);
}
(*phase) = phase_Ptr[0];
diff --git a/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add_32fc.h b/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add_32fc.h
index 85cdaf16..70a0b31f 100644
--- a/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add_32fc.h
+++ b/kernels/volk/volk_32fc_x2_s32fc_multiply_conjugate_add_32fc.h
@@ -15,12 +15,12 @@
* Conjugate the input complex vector, multiply them by a complex scalar,
* add the another input complex vector and returns the results.
*
- * c[i] = a[i] + conj(b[i]) * scalar
+ * c[i] = a[i] + conj(b[i]) * (*scalar)
*
* Dispatcher Prototype
* \code
* void volk_32fc_x2_s32fc_multiply_conjugate_add_32fc(lv_32fc_t* cVector, const
- * lv_32fc_t* aVector, const lv_32fc_t* bVector, const lv_32fc_t scalar, unsigned int
+ * lv_32fc_t* aVector, const lv_32fc_t* bVector, const lv_32fc_t* scalar, unsigned int
* num_points); \endcode
*
* \b Inputs
@@ -58,7 +58,7 @@
* // update weight using output.
* float real = lv_creal(output) * (1.0 - std::norm(output)) * MU;
* lv_32fc_t factor = lv_cmake(real, 0.f);
- * volk_32fc_x2_s32fc_multiply_conjugate_add_32fc(next, weight, state, factor, n_filter);
+ * volk_32fc_x2_s32fc_multiply_conjugate_add_32fc(next, weight, state, &factor, n_filter);
* lv_32fc_t *tmp = next;
* next = weight;
* weight = tmp;
@@ -85,7 +85,7 @@ static inline void
volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_generic(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
const lv_32fc_t* aPtr = aVector;
@@ -95,20 +95,20 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_generic(lv_32fc_t* cVector,
// unwrap loop
while (number >= 8) {
- *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar;
- *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar;
- *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar;
- *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar;
- *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar;
- *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar;
- *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar;
- *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar;
+ *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar);
+ *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar);
number -= 8;
}
// clean up any remaining
while (number-- > 0) {
- *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar;
+ *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar);
}
}
#endif /* LV_HAVE_GENERIC */
@@ -122,7 +122,7 @@ static inline void
volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_u_avx(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
@@ -131,7 +131,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_u_avx(lv_32fc_t* cVector,
unsigned int isodd = num_points & 3;
__m256 x, y, s, z;
- lv_32fc_t v_scalar[4] = { scalar, scalar, scalar, scalar };
+ lv_32fc_t v_scalar[4] = { *scalar, *scalar, *scalar, *scalar };
const lv_32fc_t* a = aVector;
const lv_32fc_t* b = bVector;
@@ -153,7 +153,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_u_avx(lv_32fc_t* cVector,
}
for (i = num_points - isodd; i < num_points; i++) {
- *c++ = (*a++) + lv_conj(*b++) * scalar;
+ *c++ = (*a++) + lv_conj(*b++) * (*scalar);
}
}
#endif /* LV_HAVE_AVX */
@@ -167,14 +167,14 @@ static inline void
volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_u_sse3(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
__m128 x, y, s, z;
- lv_32fc_t v_scalar[2] = { scalar, scalar };
+ lv_32fc_t v_scalar[2] = { *scalar, *scalar };
const lv_32fc_t* a = aVector;
const lv_32fc_t* b = bVector;
@@ -196,7 +196,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_u_sse3(lv_32fc_t* cVector,
}
if ((num_points % 2) != 0) {
- *c = *a + lv_conj(*b) * scalar;
+ *c = *a + lv_conj(*b) * (*scalar);
}
}
#endif /* LV_HAVE_SSE */
@@ -210,7 +210,7 @@ static inline void
volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_a_avx(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
@@ -219,7 +219,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_a_avx(lv_32fc_t* cVector,
unsigned int isodd = num_points & 3;
__m256 x, y, s, z;
- lv_32fc_t v_scalar[4] = { scalar, scalar, scalar, scalar };
+ lv_32fc_t v_scalar[4] = { *scalar, *scalar, *scalar, *scalar };
const lv_32fc_t* a = aVector;
const lv_32fc_t* b = bVector;
@@ -241,7 +241,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_a_avx(lv_32fc_t* cVector,
}
for (i = num_points - isodd; i < num_points; i++) {
- *c++ = (*a++) + lv_conj(*b++) * scalar;
+ *c++ = (*a++) + lv_conj(*b++) * (*scalar);
}
}
#endif /* LV_HAVE_AVX */
@@ -255,14 +255,14 @@ static inline void
volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_a_sse3(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
__m128 x, y, s, z;
- lv_32fc_t v_scalar[2] = { scalar, scalar };
+ lv_32fc_t v_scalar[2] = { *scalar, *scalar };
const lv_32fc_t* a = aVector;
const lv_32fc_t* b = bVector;
@@ -284,7 +284,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_a_sse3(lv_32fc_t* cVector,
}
if ((num_points % 2) != 0) {
- *c = *a + lv_conj(*b) * scalar;
+ *c = *a + lv_conj(*b) * (*scalar);
}
}
#endif /* LV_HAVE_SSE */
@@ -297,7 +297,7 @@ static inline void
volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_neon(lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
- const lv_32fc_t scalar,
+ const lv_32fc_t* scalar,
unsigned int num_points)
{
const lv_32fc_t* bPtr = bVector;
@@ -309,8 +309,8 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_neon(lv_32fc_t* cVector,
float32x4x2_t a_val, b_val, c_val, scalar_val;
float32x4x2_t tmp_val;
- scalar_val.val[0] = vld1q_dup_f32((const float*)&scalar);
- scalar_val.val[1] = vld1q_dup_f32(((const float*)&scalar) + 1);
+ scalar_val.val[0] = vld1q_dup_f32((const float*)scalar);
+ scalar_val.val[1] = vld1q_dup_f32(((const float*)scalar) + 1);
for (number = 0; number < quarter_points; ++number) {
a_val = vld2q_f32((float*)aPtr);
@@ -336,7 +336,7 @@ volk_32fc_x2_s32fc_multiply_conjugate_add_32fc_neon(lv_32fc_t* cVector,
}
for (number = quarter_points * 4; number < num_points; number++) {
- *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * scalar;
+ *cPtr++ = (*aPtr++) + lv_conj(*bPtr++) * (*scalar);
}
}
#endif /* LV_HAVE_NEON */
diff --git a/lib/qa_utils.cc b/lib/qa_utils.cc
index 4be7b8ad..a94d895c 100644
--- a/lib/qa_utils.cc
+++ b/lib/qa_utils.cc
@@ -355,7 +355,7 @@ inline void run_cast_test1_s32fc(volk_fn_1arg_s32fc func,
std::string arch)
{
while (iter--)
- func(buffs[0], scalar, vlen, arch.c_str());
+ func(buffs[0], &scalar, vlen, arch.c_str());
}
inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func,
@@ -366,7 +366,7 @@ inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func,
std::string arch)
{
while (iter--)
- func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
+ func(buffs[0], buffs[1], &scalar, vlen, arch.c_str());
}
inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func,
@@ -377,7 +377,7 @@ inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func,
std::string arch)
{
while (iter--)
- func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
+ func(buffs[0], buffs[1], buffs[2], &scalar, vlen, arch.c_str());
}
template
diff --git a/lib/qa_utils.h b/lib/qa_utils.h
index 40f549cf..a6567720 100644
--- a/lib/qa_utils.h
+++ b/lib/qa_utils.h
@@ -199,11 +199,11 @@ typedef void (*volk_fn_2arg_s32f)(void*, void*, float, unsigned int, const char*
typedef void (*volk_fn_3arg_s32f)(void*, void*, void*, float, unsigned int, const char*);
typedef void (*volk_fn_1arg_s32fc)(
void*,
- lv_32fc_t,
+ lv_32fc_t*,
unsigned int,
const char*); // one input vector, one scalar float input
-typedef void (*volk_fn_2arg_s32fc)(void*, void*, lv_32fc_t, unsigned int, const char*);
+typedef void (*volk_fn_2arg_s32fc)(void*, void*, lv_32fc_t*, unsigned int, const char*);
typedef void (*volk_fn_3arg_s32fc)(
- void*, void*, void*, lv_32fc_t, unsigned int, const char*);
+ void*, void*, void*, lv_32fc_t*, unsigned int, const char*);
#endif // VOLK_QA_UTILS_H