Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Riscv64 c906 d1 #3177

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open

Conversation

yaobyPerfxlab
Copy link
Contributor

No description provided.

@codecov-commenter
Copy link

codecov-commenter commented Aug 16, 2021

Codecov Report

Merging #3177 (a926e61) into master (169614f) will increase coverage by 0.43%.
The diff coverage is 100.00%.

❗ Current head a926e61 differs from pull request most recent head 7cdbce0. Consider uploading reports for the commit 7cdbce0 to get more accurate results
Impacted file tree graph

@@            Coverage Diff             @@
##           master    #3177      +/-   ##
==========================================
+ Coverage   90.34%   90.77%   +0.43%     
==========================================
  Files         510      465      -45     
  Lines      136094   108462   -27632     
==========================================
- Hits       122950    98458   -24492     
+ Misses      13144    10004    -3140     
Impacted Files Coverage Δ
src/layer/riscv/convolution_sgemm_packn_fp16s.h 98.65% <100.00%> (+0.04%) ⬆️
src/layer/riscv/flatten_riscv.cpp 89.91% <0.00%> (-4.83%) ⬇️
src/layer/crop.cpp 79.17% <0.00%> (-3.79%) ⬇️
src/layer/x86/pooling_x86.cpp 92.15% <0.00%> (-3.30%) ⬇️
src/layer/x86/relu_x86.cpp 74.19% <0.00%> (-3.17%) ⬇️
src/allocator.cpp 73.98% <0.00%> (-2.78%) ⬇️
src/layer/x86/convolution_3x3.h 23.94% <0.00%> (-2.77%) ⬇️
src/layer/x86/reshape_x86.cpp 92.43% <0.00%> (-2.66%) ⬇️
src/layer/riscv/deconvolution_packnto1.h 97.95% <0.00%> (-2.05%) ⬇️
src/layer/x86/flatten_x86.cpp 94.76% <0.00%> (-1.80%) ⬇️
... and 239 more

Continue to review full report at Codecov.

Legend - Click here to learn more
Δ = absolute <relative> (impact), ø = not affected, ? = missing data
Powered by Codecov. Last update 224040e...7cdbce0. Read the comment docs.

yaobyPerfxlab and others added 6 commits August 16, 2021 17:53
replace vfmacc_vf with vfmacc_vv for better peformance
replace vfmacc_vf with vfmacc_vv for better performance
replace vfmacc_vf with vfmacc_vv for better performance
Comment on lines +259 to +305
#if RVV_SPEC_0_7
vfloat16m1_t _v0 = vle16_v_f16m1(tmpptr, vl);
vfloat16m1_t _val0 = vrgathervx_float16xm1(_v0, 0, vl);
vfloat16m1_t _val1 = vrgathervx_float16xm1(_v0, 1, vl);
vfloat16m1_t _val2 = vrgathervx_float16xm1(_v0, 2, vl);
vfloat16m1_t _val3 = vrgathervx_float16xm1(_v0, 3, vl);
vfloat16m1_t _val4 = vrgathervx_float16xm1(_v0, 4, vl);
vfloat16m1_t _val5 = vrgathervx_float16xm1(_v0, 5, vl);
vfloat16m1_t _val6 = vrgathervx_float16xm1(_v0, 6, vl);
vfloat16m1_t _val7 = vrgathervx_float16xm1(_v0, 7, vl);
tmpptr += 8;

vfloat16m1_t _w0 = vle16_v_f16m1(kptr0, vl);
_sum0 = vfmacc_vv_f16m1(_sum0, _val0, _w0, vl);
_sum1 = vfmacc_vv_f16m1(_sum1, _val1, _w0, vl);
_sum2 = vfmacc_vv_f16m1(_sum2, _val2, _w0, vl);
_sum3 = vfmacc_vv_f16m1(_sum3, _val3, _w0, vl);
_sum4 = vfmacc_vv_f16m1(_sum4, _val4, _w0, vl);
_sum5 = vfmacc_vv_f16m1(_sum5, _val5, _w0, vl);
_sum6 = vfmacc_vv_f16m1(_sum6, _val6, _w0, vl);
_sum7 = vfmacc_vv_f16m1(_sum7, _val7, _w0, vl);

kptr0 += packn;
#else
vfloat16m1_t _v0 = vle16_v_f16m1(tmpptr, vl);
vfloat16m1_t _val0 = vrgather_vx_f16m1(_v0, 0, vl);
vfloat16m1_t _val1 = vrgather_vx_f16m1(_v0, 1, vl);
vfloat16m1_t _val2 = vrgather_vx_f16m1(_v0, 2, vl);
vfloat16m1_t _val3 = vrgather_vx_f16m1(_v0, 3, vl);
vfloat16m1_t _val4 = vrgather_vx_f16m1(_v0, 4, vl);
vfloat16m1_t _val5 = vrgather_vx_f16m1(_v0, 5, vl);
vfloat16m1_t _val6 = vrgather_vx_f16m1(_v0, 6, vl);
vfloat16m1_t _val7 = vrgather_vx_f16m1(_v0, 7, vl);
tmpptr += 8;

vfloat16m1_t _w0 = vle16_v_f16m1(kptr0, vl);
_sum0 = vfmacc_vf_f16m1(_sum0, val0, _w0, vl);
_sum1 = vfmacc_vf_f16m1(_sum1, val1, _w0, vl);
_sum2 = vfmacc_vf_f16m1(_sum2, val2, _w0, vl);
_sum3 = vfmacc_vf_f16m1(_sum3, val3, _w0, vl);
_sum4 = vfmacc_vf_f16m1(_sum4, val4, _w0, vl);
_sum5 = vfmacc_vf_f16m1(_sum5, val5, _w0, vl);
_sum6 = vfmacc_vf_f16m1(_sum6, val6, _w0, vl);
_sum7 = vfmacc_vf_f16m1(_sum7, val7, _w0, vl);
_sum0 = vfmacc_vv_f16m1(_sum0, _val0, _w0, vl);
_sum1 = vfmacc_vv_f16m1(_sum1, _val1, _w0, vl);
_sum2 = vfmacc_vv_f16m1(_sum2, _val2, _w0, vl);
_sum3 = vfmacc_vv_f16m1(_sum3, _val3, _w0, vl);
_sum4 = vfmacc_vv_f16m1(_sum4, _val4, _w0, vl);
_sum5 = vfmacc_vv_f16m1(_sum5, _val5, _w0, vl);
_sum6 = vfmacc_vv_f16m1(_sum6, _val6, _w0, vl);
_sum7 = vfmacc_vv_f16m1(_sum7, _val7, _w0, vl);

kptr0 += packn;
#endif
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stick to rvv-1.0 spec for intrinsic code, and define compatibility alias for rvv-0.7 in riscv_v_071_fix.h

@nihui nihui closed this Oct 11, 2023
@nihui nihui reopened this Oct 11, 2023
@github-actions github-actions bot added the riscv label Oct 11, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants