Skip to content

Commit

Permalink
clean
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Aug 29, 2023
1 parent 5ecf7aa commit b575029
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 82 deletions.
72 changes: 0 additions & 72 deletions src/layer/x86/x86_usability.h
Original file line number Diff line number Diff line change
Expand Up @@ -1081,78 +1081,6 @@ static NCNN_FORCEINLINE void transpose8x16_epi16(__m128i& _r0, __m128i& _r1, __m
_rf = _mm_unpackhi_epi64(_tmpr, _tmpv);
}

static NCNN_FORCEINLINE void transpose16x16_epi32(__m512i& _r0, __m512i& _r1, __m512i& _r2, __m512i& _r3, __m512i& _r4, __m512i& _r5, __m512i& _r6, __m512i& _r7,
__m512i& _r8, __m512i& _r9, __m512i& _ra, __m512i& _rb, __m512i& _rc, __m512i& _rd, __m512i& _re, __m512i& _rf)
{
__m512 _tmp0 = _mm512_unpacklo_ps(_mm512_castsi512_ps(_r0), _mm512_castsi512_ps(_r1));
__m512 _tmp1 = _mm512_unpackhi_ps(_mm512_castsi512_ps(_r0), _mm512_castsi512_ps(_r1));
__m512 _tmp2 = _mm512_unpacklo_ps(_mm512_castsi512_ps(_r2), _mm512_castsi512_ps(_r3));
__m512 _tmp3 = _mm512_unpackhi_ps(_mm512_castsi512_ps(_r2), _mm512_castsi512_ps(_r3));
__m512 _tmp4 = _mm512_unpacklo_ps(_mm512_castsi512_ps(_r4), _mm512_castsi512_ps(_r5));
__m512 _tmp5 = _mm512_unpackhi_ps(_mm512_castsi512_ps(_r4), _mm512_castsi512_ps(_r5));
__m512 _tmp6 = _mm512_unpacklo_ps(_mm512_castsi512_ps(_r6), _mm512_castsi512_ps(_r7));
__m512 _tmp7 = _mm512_unpackhi_ps(_mm512_castsi512_ps(_r6), _mm512_castsi512_ps(_r7));
__m512 _tmp8 = _mm512_unpacklo_ps(_mm512_castsi512_ps(_r8), _mm512_castsi512_ps(_r9));
__m512 _tmp9 = _mm512_unpackhi_ps(_mm512_castsi512_ps(_r8), _mm512_castsi512_ps(_r9));
__m512 _tmpa = _mm512_unpacklo_ps(_mm512_castsi512_ps(_ra), _mm512_castsi512_ps(_rb));
__m512 _tmpb = _mm512_unpackhi_ps(_mm512_castsi512_ps(_ra), _mm512_castsi512_ps(_rb));
__m512 _tmpc = _mm512_unpacklo_ps(_mm512_castsi512_ps(_rc), _mm512_castsi512_ps(_rd));
__m512 _tmpd = _mm512_unpackhi_ps(_mm512_castsi512_ps(_rc), _mm512_castsi512_ps(_rd));
__m512 _tmpe = _mm512_unpacklo_ps(_mm512_castsi512_ps(_re), _mm512_castsi512_ps(_rf));
__m512 _tmpf = _mm512_unpackhi_ps(_mm512_castsi512_ps(_re), _mm512_castsi512_ps(_rf));

__m512 _tmpg = _mm512_shuffle_ps(_tmp0, _tmp2, _MM_SHUFFLE(1, 0, 1, 0));
__m512 _tmph = _mm512_shuffle_ps(_tmp0, _tmp2, _MM_SHUFFLE(3, 2, 3, 2));
__m512 _tmpi = _mm512_shuffle_ps(_tmp1, _tmp3, _MM_SHUFFLE(1, 0, 1, 0));
__m512 _tmpj = _mm512_shuffle_ps(_tmp1, _tmp3, _MM_SHUFFLE(3, 2, 3, 2));
__m512 _tmpk = _mm512_shuffle_ps(_tmp4, _tmp6, _MM_SHUFFLE(1, 0, 1, 0));
__m512 _tmpl = _mm512_shuffle_ps(_tmp4, _tmp6, _MM_SHUFFLE(3, 2, 3, 2));
__m512 _tmpm = _mm512_shuffle_ps(_tmp5, _tmp7, _MM_SHUFFLE(1, 0, 1, 0));
__m512 _tmpn = _mm512_shuffle_ps(_tmp5, _tmp7, _MM_SHUFFLE(3, 2, 3, 2));
__m512 _tmpo = _mm512_shuffle_ps(_tmp8, _tmpa, _MM_SHUFFLE(1, 0, 1, 0));
__m512 _tmpp = _mm512_shuffle_ps(_tmp8, _tmpa, _MM_SHUFFLE(3, 2, 3, 2));
__m512 _tmpq = _mm512_shuffle_ps(_tmp9, _tmpb, _MM_SHUFFLE(1, 0, 1, 0));
__m512 _tmpr = _mm512_shuffle_ps(_tmp9, _tmpb, _MM_SHUFFLE(3, 2, 3, 2));
__m512 _tmps = _mm512_shuffle_ps(_tmpc, _tmpe, _MM_SHUFFLE(1, 0, 1, 0));
__m512 _tmpt = _mm512_shuffle_ps(_tmpc, _tmpe, _MM_SHUFFLE(3, 2, 3, 2));
__m512 _tmpu = _mm512_shuffle_ps(_tmpd, _tmpf, _MM_SHUFFLE(1, 0, 1, 0));
__m512 _tmpv = _mm512_shuffle_ps(_tmpd, _tmpf, _MM_SHUFFLE(3, 2, 3, 2));

_tmp0 = _mm512_shuffle_f32x4(_tmpg, _tmpk, _MM_SHUFFLE(2, 0, 2, 0));
_tmp1 = _mm512_shuffle_f32x4(_tmpo, _tmps, _MM_SHUFFLE(2, 0, 2, 0));
_tmp2 = _mm512_shuffle_f32x4(_tmph, _tmpl, _MM_SHUFFLE(2, 0, 2, 0));
_tmp3 = _mm512_shuffle_f32x4(_tmpp, _tmpt, _MM_SHUFFLE(2, 0, 2, 0));
_tmp4 = _mm512_shuffle_f32x4(_tmpi, _tmpm, _MM_SHUFFLE(2, 0, 2, 0));
_tmp5 = _mm512_shuffle_f32x4(_tmpq, _tmpu, _MM_SHUFFLE(2, 0, 2, 0));
_tmp6 = _mm512_shuffle_f32x4(_tmpj, _tmpn, _MM_SHUFFLE(2, 0, 2, 0));
_tmp7 = _mm512_shuffle_f32x4(_tmpr, _tmpv, _MM_SHUFFLE(2, 0, 2, 0));
_tmp8 = _mm512_shuffle_f32x4(_tmpg, _tmpk, _MM_SHUFFLE(3, 1, 3, 1));
_tmp9 = _mm512_shuffle_f32x4(_tmpo, _tmps, _MM_SHUFFLE(3, 1, 3, 1));
_tmpa = _mm512_shuffle_f32x4(_tmph, _tmpl, _MM_SHUFFLE(3, 1, 3, 1));
_tmpb = _mm512_shuffle_f32x4(_tmpp, _tmpt, _MM_SHUFFLE(3, 1, 3, 1));
_tmpc = _mm512_shuffle_f32x4(_tmpi, _tmpm, _MM_SHUFFLE(3, 1, 3, 1));
_tmpd = _mm512_shuffle_f32x4(_tmpq, _tmpu, _MM_SHUFFLE(3, 1, 3, 1));
_tmpe = _mm512_shuffle_f32x4(_tmpj, _tmpn, _MM_SHUFFLE(3, 1, 3, 1));
_tmpf = _mm512_shuffle_f32x4(_tmpr, _tmpv, _MM_SHUFFLE(3, 1, 3, 1));

_r0 = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmp0, _tmp1, _MM_SHUFFLE(2, 0, 2, 0)));
_r1 = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmp2, _tmp3, _MM_SHUFFLE(2, 0, 2, 0)));
_r2 = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmp4, _tmp5, _MM_SHUFFLE(2, 0, 2, 0)));
_r3 = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmp6, _tmp7, _MM_SHUFFLE(2, 0, 2, 0)));
_r4 = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmp8, _tmp9, _MM_SHUFFLE(2, 0, 2, 0)));
_r5 = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmpa, _tmpb, _MM_SHUFFLE(2, 0, 2, 0)));
_r6 = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmpc, _tmpd, _MM_SHUFFLE(2, 0, 2, 0)));
_r7 = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmpe, _tmpf, _MM_SHUFFLE(2, 0, 2, 0)));
_r8 = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmp0, _tmp1, _MM_SHUFFLE(3, 1, 3, 1)));
_r9 = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmp2, _tmp3, _MM_SHUFFLE(3, 1, 3, 1)));
_ra = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmp4, _tmp5, _MM_SHUFFLE(3, 1, 3, 1)));
_rb = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmp6, _tmp7, _MM_SHUFFLE(3, 1, 3, 1)));
_rc = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmp8, _tmp9, _MM_SHUFFLE(3, 1, 3, 1)));
_rd = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmpa, _tmpb, _MM_SHUFFLE(3, 1, 3, 1)));
_re = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmpc, _tmpd, _MM_SHUFFLE(3, 1, 3, 1)));
_rf = _mm512_castps_si512(_mm512_shuffle_f32x4(_tmpe, _tmpf, _MM_SHUFFLE(3, 1, 3, 1)));
}

static NCNN_FORCEINLINE void transpose16x8_epi32(__m512i& _r0, __m512i& _r1, __m512i& _r2, __m512i& _r3, __m512i& _r4, __m512i& _r5, __m512i& _r6, __m512i& _r7)
{
__m512 _tmp0 = _mm512_unpacklo_ps(_mm512_castsi512_ps(_r0), _mm512_castsi512_ps(_r1));
Expand Down
10 changes: 0 additions & 10 deletions tests/test_convolution_3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

#include "layer/convolution.h"
#include "testutil.h"
#include "cpu.h"

static int test_convolution_vec(int w, int outch, int kernel, int dilation, int stride, int pad, int bias)
{
Expand Down Expand Up @@ -395,15 +394,6 @@ int main()
{
SRAND(7767517);

fprintf(stderr, "ncnn::cpu_support_x86_avx() = %d\n", ncnn::cpu_support_x86_avx());
fprintf(stderr, "ncnn::cpu_support_x86_fma() = %d\n", ncnn::cpu_support_x86_fma());
fprintf(stderr, "ncnn::cpu_support_x86_xop() = %d\n", ncnn::cpu_support_x86_xop());
fprintf(stderr, "ncnn::cpu_support_x86_f16c() = %d\n", ncnn::cpu_support_x86_f16c());
fprintf(stderr, "ncnn::cpu_support_x86_avx2() = %d\n", ncnn::cpu_support_x86_avx2());
fprintf(stderr, "ncnn::cpu_support_x86_avx_vnni() = %d\n", ncnn::cpu_support_x86_avx_vnni());
fprintf(stderr, "ncnn::cpu_support_x86_avx512() = %d\n", ncnn::cpu_support_x86_avx512());
fprintf(stderr, "ncnn::cpu_support_x86_avx512_vnni() = %d\n", ncnn::cpu_support_x86_avx512_vnni());

#if NCNN_INT8
return 0
|| test_convolution_1()
Expand Down

0 comments on commit b575029

Please sign in to comment.