Skip to content

Commit

Permalink
no sse fix
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Nov 29, 2024
1 parent ea43b93 commit f32b4b4
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions src/layer/x86/gemm_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -7956,11 +7956,6 @@ static void transpose_pack_A_tile_fp32_to_int8(const Mat& A, Mat& AT, int i, int

const float scale = scales[i + ii];

// if (max_kk == 32)
// {
// NCNN_LOGE("===== %p %d %f", p0, p0[0], scale);
// }

#if __SSE2__
#if __AVX__
#if __AVX512F__
Expand Down Expand Up @@ -19946,6 +19941,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,

const signed char* pA = pAT;
int kk = 0;
#if __SSE2__
#if __AVX512VNNI__ || __AVXVNNI__
for (; kk + 3 < max_kk; kk += 4)
{
Expand Down Expand Up @@ -19992,6 +19988,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,
pA += 4;
pB += 4;
}
#endif // __SSE2__
for (; kk < max_kk; kk += 1)
{
sum00 += pA[0] * pB[0];
Expand Down Expand Up @@ -20028,6 +20025,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,

const signed char* pA = pAT;
int kk = 0;
#if __SSE2__
#if __AVX512VNNI__ || __AVXVNNI__
for (; kk + 3 < max_kk; kk += 4)
{
Expand Down Expand Up @@ -20060,6 +20058,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,
pA += 4;
pB += 2;
}
#endif // __SSE2__
for (; kk < max_kk; kk += 1)
{
sum0 += pA[0] * pB[0];
Expand Down Expand Up @@ -20346,6 +20345,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,

const signed char* pA = pAT;
int kk = 0;
#if __SSE2__
#if __AVX512VNNI__ || __AVXVNNI__
for (; kk + 3 < max_kk; kk += 4)
{
Expand Down Expand Up @@ -20377,6 +20377,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,
pA += 2;
pB += 4;
}
#endif // __SSE2__
for (; kk < max_kk; kk += 1)
{
sum0 += pA[0] * pB[0];
Expand Down

0 comments on commit f32b4b4

Please sign in to comment.