Use pointers to pass in s32fc arguments

This avoids undefined behaviour arising from incompatibility between complex numbers in C and C++. Signed-off-by: Clayton Smith <argilo@gmail.com>
gnuradio · Nov 7, 2023 · 7e20f80 · 7e20f80
1 parent 56a893c
commit 7e20f80
Show file tree

Hide file tree

Showing 7 changed files with 134 additions and 131 deletions.
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -114,7 +114,6 @@ jobs:
           submodules: 'recursive'
       - uses: uraimo/run-on-arch-action@v2.5.0
         name: Build in non-x86 container
-        continue-on-error: ${{ contains(fromJson('["ppc64le", "s390x"]'), matrix.arch) }}
         id: build
         with:
           arch: ${{ matrix.arch }}
@@ -153,7 +152,9 @@ jobs:
             cmake -DCMAKE_CXX_FLAGS="-Werror" -DBUILD_EXECUTABLE=ON ..
             echo "Build with $(nproc) thread(s)"
             make -j$(nproc)
-            ./cpu_features/list_cpu_features
+            if [ -f ./cpu_features/list_cpu_features ]; then
+              ./cpu_features/list_cpu_features
+            fi
             ./apps/volk-config-info --alignment
             ./apps/volk-config-info --avail-machines
             ./apps/volk-config-info --all-machines

diff --git a/kernels/volk/volk_32fc_s32fc_multiply_32fc.h b/kernels/volk/volk_32fc_s32fc_multiply_32fc.h
@@ -18,11 +18,11 @@
  * <b>Dispatcher Prototype</b>
  * \code
  * void volk_32fc_s32fc_multiply_32fc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const
- * lv_32fc_t scalar, unsigned int num_points); \endcode
+ * lv_32fc_t* scalar, unsigned int num_points); \endcode
  *
  * \b Inputs
  * \li aVector: The input vector to be multiplied.
- * \li scalar The complex scalar to multiply against aVector.
+ * \li scalar: The complex scalar to multiply against aVector.
  * \li num_points: The number of complex values in aVector.
  *
  * \b Outputs
@@ -46,7 +46,7 @@
  *       in[ii+N/2] = lv_cmake(-real, -imag);
  *   }
  *
- *   volk_32fc_s32fc_multiply_32fc(out, in, scalar, N);
+ *   volk_32fc_s32fc_multiply_32fc(out, in, &scalar, N);
  *
  *   printf(" mag   phase  |   mag   phase\n");
  *   for(unsigned int ii = 0; ii < N; ++ii){
@@ -73,7 +73,7 @@
 
 static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
                                                            const lv_32fc_t* aVector,
-                                                           const lv_32fc_t scalar,
+                                                           const lv_32fc_t* scalar,
                                                            unsigned int num_points)
 {
     unsigned int number = 0;
@@ -85,8 +85,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
     const lv_32fc_t* a = aVector;
 
     // Set up constant scalar vector
-    yl = _mm256_set1_ps(lv_creal(scalar));
-    yh = _mm256_set1_ps(lv_cimag(scalar));
+    yl = _mm256_set1_ps(lv_creal(*scalar));
+    yh = _mm256_set1_ps(lv_cimag(*scalar));
 
     for (; number < quarterPoints; number++) {
         x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -107,7 +107,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
     }
 
     for (i = num_points - isodd; i < num_points; i++) {
-        *c++ = (*a++) * scalar;
+        *c++ = (*a++) * (*scalar);
     }
 }
 #endif /* LV_HAVE_AVX && LV_HAVE_FMA */
@@ -117,7 +117,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx_fma(lv_32fc_t* cVector,
 
 static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
                                                        const lv_32fc_t* aVector,
-                                                       const lv_32fc_t scalar,
+                                                       const lv_32fc_t* scalar,
                                                        unsigned int num_points)
 {
     unsigned int number = 0;
@@ -129,8 +129,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
     const lv_32fc_t* a = aVector;
 
     // Set up constant scalar vector
-    yl = _mm256_set1_ps(lv_creal(scalar));
-    yh = _mm256_set1_ps(lv_cimag(scalar));
+    yl = _mm256_set1_ps(lv_creal(*scalar));
+    yh = _mm256_set1_ps(lv_cimag(*scalar));
 
     for (; number < quarterPoints; number++) {
         x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -151,7 +151,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
     }
 
     for (i = num_points - isodd; i < num_points; i++) {
-        *c++ = (*a++) * scalar;
+        *c++ = (*a++) * (*scalar);
     }
 }
 #endif /* LV_HAVE_AVX */
@@ -161,7 +161,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector,
 
 static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
                                                         const lv_32fc_t* aVector,
-                                                        const lv_32fc_t scalar,
+                                                        const lv_32fc_t* scalar,
                                                         unsigned int num_points)
 {
     unsigned int number = 0;
@@ -172,8 +172,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
     const lv_32fc_t* a = aVector;
 
     // Set up constant scalar vector
-    yl = _mm_set_ps1(lv_creal(scalar));
-    yh = _mm_set_ps1(lv_cimag(scalar));
+    yl = _mm_set_ps1(lv_creal(*scalar));
+    yh = _mm_set_ps1(lv_cimag(*scalar));
 
     for (; number < halfPoints; number++) {
 
@@ -195,7 +195,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
     }
 
     if ((num_points % 2) != 0) {
-        *c = (*a) * scalar;
+        *c = (*a) * (*scalar);
     }
 }
 #endif /* LV_HAVE_SSE */
@@ -204,7 +204,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector,
 
 static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,
                                                          const lv_32fc_t* aVector,
-                                                         const lv_32fc_t scalar,
+                                                         const lv_32fc_t* scalar,
                                                          unsigned int num_points)
 {
     lv_32fc_t* cPtr = cVector;
@@ -213,20 +213,20 @@ static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,
 
     // unwrap loop
     while (number >= 8) {
-        *cPtr++ = (*aPtr++) * scalar;
-        *cPtr++ = (*aPtr++) * scalar;
-        *cPtr++ = (*aPtr++) * scalar;
-        *cPtr++ = (*aPtr++) * scalar;
-        *cPtr++ = (*aPtr++) * scalar;
-        *cPtr++ = (*aPtr++) * scalar;
-        *cPtr++ = (*aPtr++) * scalar;
-        *cPtr++ = (*aPtr++) * scalar;
+        *cPtr++ = (*aPtr++) * (*scalar);
+        *cPtr++ = (*aPtr++) * (*scalar);
+        *cPtr++ = (*aPtr++) * (*scalar);
+        *cPtr++ = (*aPtr++) * (*scalar);
+        *cPtr++ = (*aPtr++) * (*scalar);
+        *cPtr++ = (*aPtr++) * (*scalar);
+        *cPtr++ = (*aPtr++) * (*scalar);
+        *cPtr++ = (*aPtr++) * (*scalar);
         number -= 8;
     }
 
     // clean up any remaining
     while (number-- > 0)
-        *cPtr++ = *aPtr++ * scalar;
+        *cPtr++ = *aPtr++ * (*scalar);
 }
 #endif /* LV_HAVE_GENERIC */
 
@@ -245,7 +245,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector,
 
 static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
                                                            const lv_32fc_t* aVector,
-                                                           const lv_32fc_t scalar,
+                                                           const lv_32fc_t* scalar,
                                                            unsigned int num_points)
 {
     unsigned int number = 0;
@@ -257,8 +257,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
     const lv_32fc_t* a = aVector;
 
     // Set up constant scalar vector
-    yl = _mm256_set1_ps(lv_creal(scalar));
-    yh = _mm256_set1_ps(lv_cimag(scalar));
+    yl = _mm256_set1_ps(lv_creal(*scalar));
+    yh = _mm256_set1_ps(lv_cimag(*scalar));
 
     for (; number < quarterPoints; number++) {
         x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -279,7 +279,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
     }
 
     for (i = num_points - isodd; i < num_points; i++) {
-        *c++ = (*a++) * scalar;
+        *c++ = (*a++) * (*scalar);
     }
 }
 #endif /* LV_HAVE_AVX && LV_HAVE_FMA */
@@ -290,7 +290,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx_fma(lv_32fc_t* cVector,
 
 static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
                                                        const lv_32fc_t* aVector,
-                                                       const lv_32fc_t scalar,
+                                                       const lv_32fc_t* scalar,
                                                        unsigned int num_points)
 {
     unsigned int number = 0;
@@ -302,8 +302,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
     const lv_32fc_t* a = aVector;
 
     // Set up constant scalar vector
-    yl = _mm256_set1_ps(lv_creal(scalar));
-    yh = _mm256_set1_ps(lv_cimag(scalar));
+    yl = _mm256_set1_ps(lv_creal(*scalar));
+    yh = _mm256_set1_ps(lv_cimag(*scalar));
 
     for (; number < quarterPoints; number++) {
         x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
@@ -324,7 +324,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
     }
 
     for (i = num_points - isodd; i < num_points; i++) {
-        *c++ = (*a++) * scalar;
+        *c++ = (*a++) * (*scalar);
     }
 }
 #endif /* LV_HAVE_AVX */
@@ -334,7 +334,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector,
 
 static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
                                                         const lv_32fc_t* aVector,
-                                                        const lv_32fc_t scalar,
+                                                        const lv_32fc_t* scalar,
                                                         unsigned int num_points)
 {
     unsigned int number = 0;
@@ -345,8 +345,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
     const lv_32fc_t* a = aVector;
 
     // Set up constant scalar vector
-    yl = _mm_set_ps1(lv_creal(scalar));
-    yh = _mm_set_ps1(lv_cimag(scalar));
+    yl = _mm_set_ps1(lv_creal(*scalar));
+    yh = _mm_set_ps1(lv_cimag(*scalar));
 
     for (; number < halfPoints; number++) {
 
@@ -368,7 +368,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
     }
 
     if ((num_points % 2) != 0) {
-        *c = (*a) * scalar;
+        *c = (*a) * (*scalar);
     }
 }
 #endif /* LV_HAVE_SSE */
@@ -378,7 +378,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector,
 
 static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
                                                       const lv_32fc_t* aVector,
-                                                      const lv_32fc_t scalar,
+                                                      const lv_32fc_t* scalar,
                                                       unsigned int num_points)
 {
     lv_32fc_t* cPtr = cVector;
@@ -389,8 +389,8 @@ static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
     float32x4x2_t a_val, scalar_val;
     float32x4x2_t tmp_imag;
 
-    scalar_val.val[0] = vld1q_dup_f32((const float*)&scalar);
-    scalar_val.val[1] = vld1q_dup_f32(((const float*)&scalar) + 1);
+    scalar_val.val[0] = vld1q_dup_f32((const float*)scalar);
+    scalar_val.val[1] = vld1q_dup_f32(((const float*)scalar) + 1);
     for (number = 0; number < quarter_points; ++number) {
         a_val = vld2q_f32((float*)aPtr);
         tmp_imag.val[1] = vmulq_f32(a_val.val[1], scalar_val.val[0]);
@@ -405,7 +405,7 @@ static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector,
     }
 
     for (number = quarter_points * 4; number < num_points; number++) {
-        *cPtr++ = *aPtr++ * scalar;
+        *cPtr++ = *aPtr++ * (*scalar);
     }
 }
 #endif /* LV_HAVE_NEON */