diff --git a/src/layer/x86/gridsample_bilinear_apply_interpolation.h b/src/layer/x86/gridsample_bilinear_apply_interpolation.h index 7a78338c988..23ae4253dea 100644 --- a/src/layer/x86/gridsample_bilinear_apply_interpolation.h +++ b/src/layer/x86/gridsample_bilinear_apply_interpolation.h @@ -100,7 +100,6 @@ static void gridsample_3d_bilinear_apply_interpolation_p16(const Mat& src, Mat& in_bound = offset_ptr[7] >= 0 ? 0xFFFF : 0; __m512 v111_val = _mm512_maskz_load_ps(in_bound, srcptr + offset_ptr[7]); - __m512 value = _mm512_set1_ps(value_ptr[0]); __m512 v00 = _mm512_fmadd_ps(v001_val, value, _mm512_fnmadd_ps(v000_val, value, v000_val)); __m512 v01 = _mm512_fmadd_ps(v011_val, value, _mm512_fnmadd_ps(v010_val, value, v010_val)); @@ -285,7 +284,7 @@ static void gridsample_3d_bilinear_apply_interpolation_p4(const Mat& src, Mat& d { const int* offset_ptr = (int*)offset_value_ptr; const float* value_ptr = offset_value_ptr + 8; - + __m128 v000_val = offset_ptr[0] >= 0 ? _mm_load_ps(srcptr + offset_ptr[0]) : _mm_set1_ps(0); __m128 v001_val = offset_ptr[1] >= 0 ? _mm_load_ps(srcptr + offset_ptr[1]) : _mm_set1_ps(0); __m128 v010_val = offset_ptr[2] >= 0 ? _mm_load_ps(srcptr + offset_ptr[2]) : _mm_set1_ps(0); diff --git a/src/layer/x86/gridsample_bilinear_compute_blob.h b/src/layer/x86/gridsample_bilinear_compute_blob.h index bf2d80f4ec1..caf9208bdde 100644 --- a/src/layer/x86/gridsample_bilinear_compute_blob.h +++ b/src/layer/x86/gridsample_bilinear_compute_blob.h @@ -79,7 +79,7 @@ void gridsample_2d_bilinear_compute_blob(const Mat& src, const Mat& grid, Mat& o transpose8x6_ps(nw_offset, ne_offset, sw_offset, se_offset, alpha, beta); _mm256_storeu_ps(offset_value_ptr, nw_offset); - _mm256_storeu_ps(offset_value_ptr + 8, ne_offset); + _mm256_storeu_ps(offset_value_ptr + 8, ne_offset); _mm256_storeu_ps(offset_value_ptr + 16, sw_offset); _mm256_storeu_ps(offset_value_ptr + 24, se_offset); @@ -339,7 +339,7 @@ void gridsample_3d_bilinear_compute_blob(const Mat& src, const Mat& grid, Mat& o tne_offset = _mm256_castsi256_ps(_mm256_cvtps_epi32(tne_offset)); tsw_offset = _mm256_castsi256_ps(_mm256_cvtps_epi32(tsw_offset)); tse_offset = _mm256_castsi256_ps(_mm256_cvtps_epi32(tse_offset)); - + bnw_offset = _mm256_castsi256_ps(_mm256_cvtps_epi32(bnw_offset)); bne_offset = _mm256_castsi256_ps(_mm256_cvtps_epi32(bne_offset)); bsw_offset = _mm256_castsi256_ps(_mm256_cvtps_epi32(bsw_offset)); @@ -422,7 +422,7 @@ void gridsample_3d_bilinear_compute_blob(const Mat& src, const Mat& grid, Mat& o offset_ptr[1] = in_bound_001 ? (x1 + y0 * src.w + z0 * src.w * src.h) * src.elempack : -1.0; offset_ptr[2] = in_bound_010 ? (x0 + y1 * src.w + z0 * src.w * src.h) * src.elempack : -1.0; offset_ptr[3] = in_bound_011 ? (x1 + y1 * src.w + z0 * src.w * src.h) * src.elempack : -1.0; - + offset_ptr[4] = in_bound_100 ? (x0 + y0 * src.w + z1 * src.w * src.h) * src.elempack : -1.0; offset_ptr[5] = in_bound_101 ? (x1 + y0 * src.w + z1 * src.w * src.h) * src.elempack : -1.0; offset_ptr[6] = in_bound_110 ? (x0 + y1 * src.w + z1 * src.w * src.h) * src.elempack : -1.0; diff --git a/src/layer/x86/gridsample_x86.cpp b/src/layer/x86/gridsample_x86.cpp index 312c5e17d25..004bc4d0895 100644 --- a/src/layer/x86/gridsample_x86.cpp +++ b/src/layer/x86/gridsample_x86.cpp @@ -55,7 +55,7 @@ int GridSample_x86::forward(const std::vector& bottom_blobs, std::vector