-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OpenCL] fuse conv prelu pass #5461
Changes from all commits
f04917b
f79d766
718c4d0
8d1c252
f071852
9a386fd
3be4f79
8fd713c
3c322d9
2925e0f
fdc7dac
10add2e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,8 @@ __kernel void conv2d_1x1_opt( | |
__private const int input_height, /* of one block */ | ||
__private const int output_width, | ||
__private const int output_height, | ||
__private const int old_w) { | ||
__private const int old_w, | ||
__read_only image2d_t prelu_alpha) { | ||
|
||
const int out_c = get_global_id(0); | ||
const int out_w = get_global_id(1); | ||
|
@@ -251,10 +252,33 @@ __kernel void conv2d_1x1_opt( | |
READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0)); | ||
#endif | ||
|
||
output0 = activation_type4(output0); | ||
output1 = activation_type4(output1); | ||
output2 = activation_type4(output2); | ||
output3 = activation_type4(output3); | ||
CL_DTYPE4 alpha0,alpha1,alpha2,alpha3; | ||
#ifdef PRELU_CH //{ | ||
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_c, 0)); | ||
alpha1 = alpha0; | ||
alpha2 = alpha0; | ||
alpha3 = alpha0; | ||
//} | ||
#elif defined(PRELU_ELE) //{ | ||
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos0); | ||
alpha1 = alpha0; | ||
alpha2 = alpha0; | ||
alpha3 = alpha0; | ||
//} | ||
#elif defined(PRELU_ALL) //{ | ||
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0)); | ||
alpha0.y = alpha0.x; | ||
alpha0.z = alpha0.x; | ||
alpha0.w = alpha0.x; | ||
alpha1 = alpha0; | ||
alpha2 = alpha0; | ||
alpha3 = alpha0; | ||
//} | ||
#endif | ||
output0 = activation_type4(output0, alpha0); | ||
output1 = activation_type4(output1, alpha1); | ||
output2 = activation_type4(output2, alpha2); | ||
output3 = activation_type4(output3, alpha3); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 考虑能否把该值融合到activation_type和type4中,以增加默认参数的方式。这个与FUSE_SCALE_ACT还有些不同,后者FUSE_SCALE不仅在convkernel中有用到,在element kernel中也有用到 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 我看原来的实现中有prelu的选项,如果有prelu定义时添加一个参数,如果没有就只有一个参数。你是说把第二个参数设置成默认就含有是么 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 对的,第二个参数默认就含有 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 已修改 |
||
|
||
#ifdef SCALE_ACTIVATION | ||
output0 = fuse_scale(output0, 1.f, 0.f, 0.f); | ||
|
@@ -301,7 +325,8 @@ __kernel void conv2d_1x1_simple( | |
__private const int input_height, /* of one block */ | ||
__private const int output_width, | ||
__private const int output_height, | ||
__private const int old_w) { | ||
__private const int old_w, | ||
__read_only image2d_t prelu_alpha) { | ||
const int out_c = get_global_id(0); | ||
const int out_w = get_global_id(1); | ||
const int out_nh = get_global_id(2); | ||
|
@@ -421,10 +446,33 @@ __kernel void conv2d_1x1_simple( | |
READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0)); | ||
#endif | ||
|
||
output0 = activation_type4(output0); | ||
output1 = activation_type4(output1); | ||
output2 = activation_type4(output2); | ||
output3 = activation_type4(output3); | ||
CL_DTYPE4 alpha0,alpha1,alpha2,alpha3; | ||
#ifdef PRELU_CH //{ | ||
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_c, 0)); | ||
alpha1 = alpha0; | ||
alpha2 = alpha0; | ||
alpha3 = alpha0; | ||
//} | ||
#elif defined(PRELU_ELE) //{ | ||
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos0); | ||
alpha1 = alpha0; | ||
alpha2 = alpha0; | ||
alpha3 = alpha0; | ||
//} | ||
#elif defined(PRELU_ALL) //{ | ||
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0)); | ||
alpha0.y = alpha0.x; | ||
alpha0.z = alpha0.x; | ||
alpha0.w = alpha0.x; | ||
alpha1 = alpha0; | ||
alpha2 = alpha0; | ||
alpha3 = alpha0; | ||
//} | ||
#endif | ||
output0 = activation_type4(output0, alpha0); | ||
output1 = activation_type4(output1, alpha1); | ||
output2 = activation_type4(output2, alpha2); | ||
output3 = activation_type4(output3, alpha3); | ||
|
||
#ifdef SCALE_ACTIVATION | ||
output0 = fuse_scale(output0, 1.f, 0.f, 0.f); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这个是out_tensor_w嘛
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这个是prelu的参数,是一个tensor。之前的act里面只要添加attribute就好了,prelu融合需要添加一个新的输入