Skip to content

Commit

Permalink
fix mingw64 avx crash and termux build issue (#5464)
Browse files Browse the repository at this point in the history
* Remove two potential warnings for VisualStudio

* fix mingw64 avx crash

* fix build issue in termux
  • Loading branch information
TianZerL authored Jun 5, 2024
1 parent bbf2e5d commit b0de947
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 133 deletions.
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ if(NCNN_VULKAN)
target_link_libraries(ncnn PRIVATE glslang SPIRV)
endif()

if(NCNN_PLATFORM_API AND ANDROID_NDK)
if(NCNN_PLATFORM_API AND ANDROID)
target_link_libraries(ncnn PUBLIC android jnigraphics log)
endif()

Expand Down
2 changes: 1 addition & 1 deletion src/layer/x86/avx_mathfun.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ _PS256_CONST(cephes_tanh_p8, 1.18534705686654e-04f);
_PS256_CONST(cephes_tanh_p9, 2.26843463243900e-03f);

// an approximation of tanh
static inline __m256 tanh256_ps(const __m256 x)
static NCNN_FORCEINLINE __m256 tanh256_ps(__m256 x)
{
__m256 value = x;
value = _mm256_max_ps(*(__m256*)_ps256_tanh_lo, value);
Expand Down
96 changes: 48 additions & 48 deletions src/layer/x86/binaryop_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,22 +479,22 @@ namespace BinaryOp_x86_functor {

struct binary_op_add
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return x + y;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_add_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_add_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_add_ps(x, y);
}
Expand All @@ -505,22 +505,22 @@ struct binary_op_add

struct binary_op_sub
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return x - y;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_sub_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_sub_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_sub_ps(x, y);
}
Expand All @@ -531,22 +531,22 @@ struct binary_op_sub

struct binary_op_mul
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return x * y;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_mul_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_mul_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_mul_ps(x, y);
}
Expand All @@ -557,22 +557,22 @@ struct binary_op_mul

struct binary_op_div
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return x / y;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_div_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_div_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_div_ps(x, y);
}
Expand All @@ -583,22 +583,22 @@ struct binary_op_div

struct binary_op_max
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return std::max(x, y);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_max_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_max_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_max_ps(x, y);
}
Expand All @@ -609,22 +609,22 @@ struct binary_op_max

struct binary_op_min
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return std::min(x, y);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_min_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_min_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_min_ps(x, y);
}
Expand All @@ -635,22 +635,22 @@ struct binary_op_min

struct binary_op_pow
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return (float)powf(x, y);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return pow_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return pow256_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return pow512_ps(x, y);
}
Expand All @@ -661,22 +661,22 @@ struct binary_op_pow

struct binary_op_rsub
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return y - x;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_sub_ps(y, x);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_sub_ps(y, x);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_sub_ps(y, x);
}
Expand All @@ -687,22 +687,22 @@ struct binary_op_rsub

struct binary_op_rdiv
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return y / x;
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return _mm_div_ps(y, x);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return _mm256_div_ps(y, x);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return _mm512_div_ps(y, x);
}
Expand All @@ -713,22 +713,22 @@ struct binary_op_rdiv

struct binary_op_rpow
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return (float)powf(y, x);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return pow_ps(y, x);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return pow256_ps(y, x);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return pow512_ps(y, x);
}
Expand All @@ -739,22 +739,22 @@ struct binary_op_rpow

struct binary_op_atan2
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return (float)atan2f(x, y);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return atan2_ps(x, y);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return atan2256_ps(x, y);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return atan2512_ps(x, y);
}
Expand All @@ -765,22 +765,22 @@ struct binary_op_atan2

struct binary_op_ratan2
{
float func(const float& x, const float& y) const
NCNN_FORCEINLINE float func(const float& x, const float& y) const
{
return (float)atan2f(y, x);
}
#if __SSE2__
__m128 func_pack4(const __m128& x, const __m128& y) const
NCNN_FORCEINLINE __m128 func_pack4(const __m128& x, const __m128& y) const
{
return atan2_ps(y, x);
}
#if __AVX__
__m256 func_pack8(const __m256& x, const __m256& y) const
NCNN_FORCEINLINE __m256 func_pack8(const __m256& x, const __m256& y) const
{
return atan2256_ps(y, x);
}
#if __AVX512F__
__m512 func_pack16(const __m512& x, const __m512& y) const
NCNN_FORCEINLINE __m512 func_pack16(const __m512& x, const __m512& y) const
{
return atan2512_ps(y, x);
}
Expand Down
2 changes: 1 addition & 1 deletion src/layer/x86/padding_pack16.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

static void padding_constant_pack16_avx512(const Mat& src, Mat& dst, int top, int bottom, int left, int right, __m512 v)
static void padding_constant_pack16_avx512(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const __m512& v)
{
const float* ptr = src;
float* outptr = dst;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/x86/padding_pack4.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

static void padding_constant_pack4_sse(const Mat& src, Mat& dst, int top, int bottom, int left, int right, __m128 v)
static void padding_constant_pack4_sse(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const __m128& v)
{
const float* ptr = src;
float* outptr = dst;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/x86/padding_pack8.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

static void padding_constant_pack8_avx(const Mat& src, Mat& dst, int top, int bottom, int left, int right, __m256 v)
static void padding_constant_pack8_avx(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const __m256& v)
{
const float* ptr = src;
float* outptr = dst;
Expand Down
Loading

0 comments on commit b0de947

Please sign in to comment.