fix++

Tencent · Oct 21, 2024 · 338a5f9 · 338a5f9
1 parent 302be4b
commit 338a5f9
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 15 deletions.
diff --git a/src/layer/arm/gemm_int8.h b/src/layer/arm/gemm_int8.h
@@ -1724,8 +1724,8 @@ static void compute_A_tile_fp32_int8_scales(const Mat& A, Mat& scales, float B_s
 
     const float v127_B_scale = 127.f * B_scale;
 
-    float* ps = scales;
-    float* pods = out_descales;
+    float* ps = (float*)scales + i;
+    float* pods = (float*)out_descales + i;
 
 #if __ARM_NEON
     if (elempack == 4)
@@ -2750,8 +2750,8 @@ static void transpose_compute_A_tile_fp32_int8_scales(const Mat& A, Mat& scales,
 #endif
 #endif
 
-    float* ps = scales;
-    float* pods = out_descales;
+    float* ps = (float*)scales + i;
+    float* pods = (float*)out_descales + i;
 
 #if __ARM_NEON
     if (elempack == 4)

diff --git a/src/layer/arm/gemm_int8_bf16s.h b/src/layer/arm/gemm_int8_bf16s.h
@@ -38,8 +38,8 @@ static void compute_A_tile_bf16_int8_scales(const Mat& A, Mat& scales, float B_s
 
     const float v127_B_scale = 127.f * B_scale;
 
-    float* ps = scales;
-    float* pods = out_descales;
+    float* ps = (float*)scales + i;
+    float* pods = (float*)out_descales + i;
 
 #if __ARM_NEON
     if (elempack == 4)
@@ -1121,8 +1121,8 @@ static void transpose_compute_A_tile_bf16_int8_scales(const Mat& A, Mat& scales,
 #endif
 #endif
 
-    float* ps = scales;
-    float* pods = out_descales;
+    float* ps = (float*)scales + i;
+    float* pods = (float*)out_descales + i;
 
 #if __ARM_NEON
     if (elempack == 4)

diff --git a/src/layer/arm/gemm_int8_fp16s.h b/src/layer/arm/gemm_int8_fp16s.h
@@ -52,8 +52,8 @@ static void compute_A_tile_fp16_int8_scales(const Mat& A, Mat& scales, float B_s
 
     const float v127_B_scale = 127.f * B_scale;
 
-    float* ps = scales;
-    float* pods = out_descales;
+    float* ps = (float*)scales + i;
+    float* pods = (float*)out_descales + i;
 
 #if __ARM_NEON
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
@@ -1471,8 +1471,8 @@ static void transpose_compute_A_tile_fp16_int8_scales(const Mat& A, Mat& scales,
 #endif
 #endif
 
-    float* ps = scales;
-    float* pods = out_descales;
+    float* ps = (float*)scales + i;
+    float* pods = (float*)out_descales + i;
 
 #if __ARM_NEON
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

diff --git a/tests/test_multiheadattention_1.cpp b/tests/test_multiheadattention_1.cpp
@@ -55,7 +55,7 @@ static int test_multiheadattention_int8(const ncnn::Mat& q, const ncnn::Mat& k,
         as.push_back(RandomMat(k.h, q.h));
     }
 
-    float epsilon = 0.15;
+    float epsilon = 0.1;
 
     int ret = test_layer("MultiHeadAttention", pd, weights, as, 1, epsilon);
     if (ret != 0)
@@ -98,7 +98,7 @@ static int test_multiheadattention_int8_samekv(const ncnn::Mat& q, const ncnn::M
     as[0] = q;
     as[1] = kv;
 
-    float epsilon = 0.15;
+    float epsilon = 0.1;
 
     int ret = test_layer("MultiHeadAttention", pd, weights, as, 1, epsilon);
     if (ret != 0)
@@ -139,7 +139,7 @@ static int test_multiheadattention_int8_sameqkv(const ncnn::Mat& a, int embed_di
     std::vector<ncnn::Mat> as(1);
     as[0] = a;
 
-    float epsilon = 0.15;
+    float epsilon = 0.1;
 
     int ret = test_layer("MultiHeadAttention", pd, weights, as, 1, epsilon);
     if (ret != 0)