Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QualcommQnn] support models #9378

Merged
merged 3 commits into from
Aug 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ void NCHW2NHWCDataLayoutConverter::ConvertActivation(
auto& output_operands = operation->output_operands;
auto input_count = input_operands.size();
auto output_count = output_operands.size();
NNADAPTER_CHECK_EQ(input_count, 1);
NNADAPTER_CHECK_GE(input_count, 1);
NNADAPTER_CHECK_EQ(output_count, 1);
auto input_operand = input_operands[0];
auto output_operand = output_operands[0];
Expand Down
10 changes: 5 additions & 5 deletions lite/kernels/nnadapter/converter/all.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,14 +241,14 @@ REGISTER_CONVERTER(gelu,
ConvertGelu,
"huawei_ascend_npu,huawei_kirin_npu,kunlunxin_xtcl,"
"cambricon_mlu,intel_openvino,qualcomm_qnn");
REGISTER_CONVERTER(
hard_sigmoid,
ConvertHardSigmoid,
"huawei_ascend_npu,huawei_kirin_npu,verisilicon_timvx,intel_openvino");
REGISTER_CONVERTER(hard_sigmoid,
ConvertHardSigmoid,
"huawei_ascend_npu,huawei_kirin_npu,verisilicon_timvx,intel_"
"openvino,qualcomm_qnn");
REGISTER_CONVERTER(hard_swish,
ConvertHardSwish,
"huawei_ascend_npu,huawei_kirin_npu,verisilicon_timvx,"
"nvidia_tensorrt,intel_openvino,eeasytech_npu");
"nvidia_tensorrt,intel_openvino,eeasytech_npu,qualcomm_qnn");
REGISTER_CONVERTER(arg_max,
ConvertArgMinMax,
"huawei_ascend_npu,huawei_kirin_npu,nvidia_tensorrt,"
Expand Down
3 changes: 3 additions & 0 deletions lite/tests/api/test_bisenet_fp32_v2_3_nnadapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ TEST(bisenet, test_bisenet_fp32_v2_3_nnadapter) {
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
nnadapter_device_names.emplace_back("intel_openvino");
// TODO(hong19860320) Fix core dump
// 1. Model (split to relu_8.tmp_0) core dump (android htp fp16)
// 2. Error is "A single op (1e7f00000017) requires 0x704800 bytes of TCM, which
// is greater than the TCM size of 0x400000!". It seems like shape is too large?
// #elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
// nnadapter_device_names.emplace_back("qualcomm_qnn");
#else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ namespace lite {

TEST(ch_ppocr_mobile_v2_0_det,
test_ch_ppocr_mobile_v2_0_det_fp32_v2_3_nnadapter) {
FLAGS_warmup = 1;
bool prepare_before_timing = true;
std::vector<std::string> nnadapter_device_names;
std::string nnadapter_context_properties;
std::vector<paddle::lite_api::Place> valid_places;
Expand All @@ -50,9 +52,13 @@ TEST(ch_ppocr_mobile_v2_0_det,
nnadapter_device_names.emplace_back("huawei_kirin_npu");
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
nnadapter_device_names.emplace_back("intel_openvino");
// TODO(hong19860320) Fix timeout
// #elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
// nnadapter_device_names.emplace_back("qualcomm_qnn");
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
nnadapter_device_names.emplace_back("qualcomm_qnn");
// 1. Not support dynamic shape
// 2. Reduce execute time
FLAGS_iteration = 1;
FLAGS_warmup = 0;
prepare_before_timing = false;
#else
LOG(INFO) << "Unsupported NNAdapter device!";
return;
Expand Down Expand Up @@ -100,7 +106,6 @@ TEST(ch_ppocr_mobile_v2_0_det,
ReadRawData(raw_data_dir, input_names[i], input_shapes[i]));
}

FLAGS_warmup = 1;
for (int i = 0; i < FLAGS_warmup; ++i) {
fill_tensor(predictor, 0, raw_data[i].data(), input_shapes[i]);
predictor->Run();
Expand All @@ -110,7 +115,7 @@ TEST(ch_ppocr_mobile_v2_0_det,
std::vector<std::vector<float>> results;
for (size_t i = 0; i < raw_data.size(); ++i) {
fill_tensor(predictor, 0, raw_data[i].data(), input_shapes[i]);
predictor->Run();
if (prepare_before_timing) predictor->Run();

double start = GetCurrentUS();
predictor->Run();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ namespace lite {

TEST(ch_ppocr_mobile_v2_0_rec,
test_ch_ppocr_mobile_v2_0_rec_fp32_v2_3_nnadapter) {
FLAGS_warmup = 1;
bool prepare_before_timing = true;
std::string nnadapter_subgraph_partition_config_buffer;
std::vector<std::string> nnadapter_device_names;
std::string nnadapter_context_properties;
std::vector<paddle::lite_api::Place> valid_places;
Expand All @@ -48,9 +51,19 @@ TEST(ch_ppocr_mobile_v2_0_rec,
nnadapter_context_properties = "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0";
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
nnadapter_device_names.emplace_back("intel_openvino");
// TODO(hong19860320) Fix core dump
// #elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
// nnadapter_device_names.emplace_back("qualcomm_qnn");
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
nnadapter_device_names.emplace_back("qualcomm_qnn");
// 1. Not support dynamic shape
// 2. Reduce execute time
FLAGS_iteration = 1;
FLAGS_warmup = 0;
prepare_before_timing = false;
// TODO(zhupengyang): Last matmul is not supported on htp+fp16.
nnadapter_subgraph_partition_config_buffer =
"transpose2:lstm_0.tmp_0:transpose_2.tmp_0,transpose_2.tmp_1\n"
"matmul:transpose_2.tmp_0,ctc_fc_w_attr:ctc_fc.tmp_0\n"
"elementwise_add:ctc_fc.tmp_0,ctc_fc_b_attr:ctc_fc.tmp_1\n"
"softmax:ctc_fc.tmp_1:save_infer_model/scale_0.tmp_1";
#else
LOG(INFO) << "Unsupported NNAdapter device!";
return;
Expand All @@ -62,6 +75,8 @@ TEST(ch_ppocr_mobile_v2_0_rec,
cxx_config.set_valid_places(valid_places);
cxx_config.set_nnadapter_device_names(nnadapter_device_names);
cxx_config.set_nnadapter_context_properties(nnadapter_context_properties);
cxx_config.set_nnadapter_subgraph_partition_config_buffer(
nnadapter_subgraph_partition_config_buffer);
predictor = lite_api::CreatePaddlePredictor(cxx_config);
predictor->SaveOptimizedModel(FLAGS_model_dir,
paddle::lite_api::LiteModelType::kNaiveBuffer);
Expand Down Expand Up @@ -103,7 +118,6 @@ TEST(ch_ppocr_mobile_v2_0_rec,
ReadRawData(raw_data_dir, input_names[i], input_shapes[i]));
}

FLAGS_warmup = 1;
for (int i = 0; i < FLAGS_warmup; ++i) {
fill_tensor(predictor, 0, raw_data[i].data(), input_shapes[i]);
predictor->Run();
Expand All @@ -113,7 +127,7 @@ TEST(ch_ppocr_mobile_v2_0_rec,
std::vector<std::vector<float>> results;
for (size_t i = 0; i < raw_data.size(); ++i) {
fill_tensor(predictor, 0, raw_data[i].data(), input_shapes[i]);
predictor->Run();
if (prepare_before_timing) predictor->Run();

double start = GetCurrentUS();
predictor->Run();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ namespace lite {

TEST(ch_ppocr_server_v2_0_det,
test_ch_ppocr_server_v2_0_det_fp32_v2_3_nnadapter) {
FLAGS_warmup = 1;
bool prepare_before_timing = true;
std::vector<std::string> nnadapter_device_names;
std::string nnadapter_context_properties;
std::vector<paddle::lite_api::Place> valid_places;
Expand All @@ -50,9 +52,13 @@ TEST(ch_ppocr_server_v2_0_det,
nnadapter_device_names.emplace_back("huawei_kirin_npu");
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
nnadapter_device_names.emplace_back("intel_openvino");
// TODO(hong19860320) Fix timeout
// #elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
// nnadapter_device_names.emplace_back("qualcomm_qnn");
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
nnadapter_device_names.emplace_back("qualcomm_qnn");
// 1. Not support dynamic shape
// 2. Reduce execute time
FLAGS_iteration = 1;
FLAGS_warmup = 0;
prepare_before_timing = false;
#else
LOG(INFO) << "Unsupported NNAdapter device!";
return;
Expand Down Expand Up @@ -100,7 +106,6 @@ TEST(ch_ppocr_server_v2_0_det,
ReadRawData(raw_data_dir, input_names[i], input_shapes[i]));
}

FLAGS_warmup = 1;
for (int i = 0; i < FLAGS_warmup; ++i) {
fill_tensor(predictor, 0, raw_data[i].data(), input_shapes[i]);
predictor->Run();
Expand All @@ -110,7 +115,7 @@ TEST(ch_ppocr_server_v2_0_det,
std::vector<std::vector<float>> results;
for (size_t i = 0; i < raw_data.size(); ++i) {
fill_tensor(predictor, 0, raw_data[i].data(), input_shapes[i]);
predictor->Run();
if (prepare_before_timing) predictor->Run();

double start = GetCurrentUS();
predictor->Run();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ namespace lite {

TEST(ch_ppocr_server_v2_0_rec,
test_ch_ppocr_server_v2_0_rec_fp32_v2_3_nnadapter) {
FLAGS_warmup = 1;
bool prepare_before_timing = true;
std::string nnadapter_subgraph_partition_config_buffer;
std::vector<std::string> nnadapter_device_names;
std::string nnadapter_context_properties;
std::vector<paddle::lite_api::Place> valid_places;
Expand All @@ -48,9 +51,19 @@ TEST(ch_ppocr_server_v2_0_rec,
nnadapter_context_properties = "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0";
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
nnadapter_device_names.emplace_back("intel_openvino");
// TODO(hong19860320) Fix core dump
// #elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
// nnadapter_device_names.emplace_back("qualcomm_qnn");
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
nnadapter_device_names.emplace_back("qualcomm_qnn");
// 1. Not support dynamic shape
// 2. Reduce execute time
FLAGS_iteration = 1;
FLAGS_warmup = 0;
prepare_before_timing = false;
// TODO(zhupengyang): Last matmul is not supported on htp+fp16.
nnadapter_subgraph_partition_config_buffer =
"transpose2:lstm_0.tmp_0:transpose_2.tmp_0,transpose_2.tmp_1\n"
"matmul:transpose_2.tmp_0,ctc_fc_w_attr:ctc_fc.tmp_0\n"
"elementwise_add:ctc_fc.tmp_0,ctc_fc_b_attr:ctc_fc.tmp_1\n"
"softmax:ctc_fc.tmp_1:save_infer_model/scale_0.tmp_1";
#else
LOG(INFO) << "Unsupported NNAdapter device!";
return;
Expand All @@ -62,6 +75,8 @@ TEST(ch_ppocr_server_v2_0_rec,
cxx_config.set_valid_places(valid_places);
cxx_config.set_nnadapter_device_names(nnadapter_device_names);
cxx_config.set_nnadapter_context_properties(nnadapter_context_properties);
cxx_config.set_nnadapter_subgraph_partition_config_buffer(
nnadapter_subgraph_partition_config_buffer);
predictor = lite_api::CreatePaddlePredictor(cxx_config);
predictor->SaveOptimizedModel(FLAGS_model_dir,
paddle::lite_api::LiteModelType::kNaiveBuffer);
Expand Down Expand Up @@ -96,7 +111,6 @@ TEST(ch_ppocr_server_v2_0_rec,
ReadRawData(raw_data_dir, input_names[i], input_shapes[i]));
}

FLAGS_warmup = 1;
for (int i = 0; i < FLAGS_warmup; ++i) {
fill_tensor(predictor, 0, raw_data[i].data(), input_shapes[i]);
predictor->Run();
Expand All @@ -106,7 +120,7 @@ TEST(ch_ppocr_server_v2_0_rec,
std::vector<std::vector<float>> results;
for (size_t i = 0; i < raw_data.size(); ++i) {
fill_tensor(predictor, 0, raw_data[i].data(), input_shapes[i]);
predictor->Run();
if (prepare_before_timing) predictor->Run();

double start = GetCurrentUS();
predictor->Run();
Expand Down
15 changes: 10 additions & 5 deletions lite/tests/api/test_ch_ppocr_v2_det_fp32_v2_3_nnadapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ namespace paddle {
namespace lite {

TEST(ch_ppocr_v2_det, test_ch_ppocr_v2_det_fp32_v2_3_nnadapter) {
FLAGS_warmup = 1;
bool prepare_before_timing = true;
std::vector<std::string> nnadapter_device_names;
std::string nnadapter_context_properties;
std::vector<paddle::lite_api::Place> valid_places;
Expand All @@ -47,9 +49,13 @@ TEST(ch_ppocr_v2_det, test_ch_ppocr_v2_det_fp32_v2_3_nnadapter) {
nnadapter_context_properties = "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0";
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
nnadapter_device_names.emplace_back("intel_openvino");
// TODO(hong19860320) Fix timeout
// #elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
// nnadapter_device_names.emplace_back("qualcomm_qnn");
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
nnadapter_device_names.emplace_back("qualcomm_qnn");
// 1. Not support dynamic shape
// 2. Reduce execute time
FLAGS_iteration = 1;
FLAGS_warmup = 0;
prepare_before_timing = false;
#else
LOG(INFO) << "Unsupported NNAdapter device!";
return;
Expand Down Expand Up @@ -97,7 +103,6 @@ TEST(ch_ppocr_v2_det, test_ch_ppocr_v2_det_fp32_v2_3_nnadapter) {
ReadRawData(raw_data_dir, input_names[i], input_shapes[i]));
}

FLAGS_warmup = 1;
for (int i = 0; i < FLAGS_warmup; ++i) {
fill_tensor(predictor, 0, raw_data[i].data(), input_shapes[i]);
predictor->Run();
Expand All @@ -107,7 +112,7 @@ TEST(ch_ppocr_v2_det, test_ch_ppocr_v2_det_fp32_v2_3_nnadapter) {
std::vector<std::vector<float>> results;
for (size_t i = 0; i < raw_data.size(); ++i) {
fill_tensor(predictor, 0, raw_data[i].data(), input_shapes[i]);
predictor->Run();
if (prepare_before_timing) predictor->Run();

double start = GetCurrentUS();
predictor->Run();
Expand Down
25 changes: 21 additions & 4 deletions lite/tests/api/test_ch_ppocr_v2_rec_fp32_v2_3_nnadapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ namespace paddle {
namespace lite {

TEST(ch_ppocr_v2_rec, test_ch_ppocr_v2_rec_fp32_v2_3_nnadapter) {
FLAGS_warmup = 1;
bool prepare_before_timing = true;
std::string nnadapter_subgraph_partition_config_buffer;
std::vector<std::string> nnadapter_device_names;
std::string nnadapter_context_properties;
std::vector<paddle::lite_api::Place> valid_places;
Expand All @@ -47,9 +50,21 @@ TEST(ch_ppocr_v2_rec, test_ch_ppocr_v2_rec_fp32_v2_3_nnadapter) {
nnadapter_context_properties = "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0";
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
nnadapter_device_names.emplace_back("intel_openvino");
// TODO(hong19860320) Fix core dump
// #elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
// nnadapter_device_names.emplace_back("qualcomm_qnn");
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
nnadapter_device_names.emplace_back("qualcomm_qnn");
// 1. Not support dynamic shape
// 2. Reduce execute time
FLAGS_iteration = 1;
FLAGS_warmup = 0;
prepare_before_timing = false;
// TODO(zhupengyang): Last matmul is not supported on htp+fp16.
nnadapter_subgraph_partition_config_buffer =
"transpose2:lstm_1.tmp_0:transpose_2.tmp_0,transpose_2.tmp_1\n"
"matmul:transpose_2.tmp_0,student_ctc_head_1_w_attrn"
"elementwise_add:student_ctc_head_1.tmp_0,student_ctc_head_1_b_attr\n"
"matmul:student_ctc_head_1.tmp_1,student_ctc_head_2_w_attr\n"
"elementwise_add:student_ctc_head_2.tmp_0,student_ctc_head_2_b_attr\n"
"softmax:student_ctc_head_2.tmp_1:save_infer_model/scale_0.tmp_1";
#else
LOG(INFO) << "Unsupported NNAdapter device!";
return;
Expand All @@ -61,6 +76,8 @@ TEST(ch_ppocr_v2_rec, test_ch_ppocr_v2_rec_fp32_v2_3_nnadapter) {
cxx_config.set_valid_places(valid_places);
cxx_config.set_nnadapter_device_names(nnadapter_device_names);
cxx_config.set_nnadapter_context_properties(nnadapter_context_properties);
cxx_config.set_nnadapter_subgraph_partition_config_buffer(
nnadapter_subgraph_partition_config_buffer);
predictor = lite_api::CreatePaddlePredictor(cxx_config);
predictor->SaveOptimizedModel(FLAGS_model_dir,
paddle::lite_api::LiteModelType::kNaiveBuffer);
Expand Down Expand Up @@ -105,7 +122,7 @@ TEST(ch_ppocr_v2_rec, test_ch_ppocr_v2_rec_fp32_v2_3_nnadapter) {
std::vector<std::vector<float>> results;
for (size_t i = 0; i < raw_data.size(); ++i) {
fill_tensor(predictor, 0, raw_data[i].data(), input_shapes[i]);
predictor->Run();
if (prepare_before_timing) predictor->Run();

double start = GetCurrentUS();
predictor->Run();
Expand Down
5 changes: 2 additions & 3 deletions lite/tests/api/test_darknet53_fp32_v2_3_nnadapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,8 @@ TEST(DarkNet, test_darknet53_fp32_v2_3_nnadapter) {
out_accuracy_threshold = 0.75f;
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
nnadapter_device_names.emplace_back("qualcomm_qnn");
FLAGS_iteration = 1;
// TODO(hong19860320) Fix precision
out_accuracy_threshold = 0.f;
FLAGS_iteration = 5;
out_accuracy_threshold = 0.6f;
#else
LOG(INFO) << "Unsupported NNAdapter device!";
return;
Expand Down
12 changes: 9 additions & 3 deletions lite/tests/api/test_deepfm_fp32_v2_1_nnadapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ namespace paddle {
namespace lite {

TEST(deepfm, test_deepfm_fp32_v2_1_nnadapter) {
std::string nnadapter_subgraph_partition_config_buffer;
std::vector<std::string> nnadapter_device_names;
std::string nnadapter_context_properties;
std::vector<paddle::lite_api::Place> valid_places;
Expand All @@ -46,9 +47,12 @@ TEST(deepfm, test_deepfm_fp32_v2_1_nnadapter) {
nnadapter_context_properties = "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0";
#elif defined(NNADAPTER_WITH_INTEL_OPENVINO)
nnadapter_device_names.emplace_back("intel_openvino");
// TODO(hong19860320) Support int64 datatype
// #elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
// nnadapter_device_names.emplace_back("qualcomm_qnn");
#elif defined(NNADAPTER_WITH_QUALCOMM_QNN)
nnadapter_device_names.emplace_back("qualcomm_qnn");
// Not support int64 datatype
nnadapter_subgraph_partition_config_buffer =
"concat::concat_0.tmp_0\n"
"lookup_table_v2";
#else
LOG(INFO) << "Unsupported NNAdapter device!";
return;
Expand All @@ -60,6 +64,8 @@ TEST(deepfm, test_deepfm_fp32_v2_1_nnadapter) {
cxx_config.set_valid_places(valid_places);
cxx_config.set_nnadapter_device_names(nnadapter_device_names);
cxx_config.set_nnadapter_context_properties(nnadapter_context_properties);
cxx_config.set_nnadapter_subgraph_partition_config_buffer(
nnadapter_subgraph_partition_config_buffer);
predictor = lite_api::CreatePaddlePredictor(cxx_config);
predictor->SaveOptimizedModel(FLAGS_model_dir,
paddle::lite_api::LiteModelType::kNaiveBuffer);
Expand Down
5 changes: 1 addition & 4 deletions lite/tests/api/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@ T CalOutAccuracy(const std::vector<std::vector<T>>& out,
size_t right_count = 0;
size_t all_count = 0;
for (size_t i = 0; i < out.size(); i++) {
if (out[i].size() != ref_out[i].size()) {
LOG(FATAL) << "size error: out_size = " << out[i].size()
<< ", ref_out_size = " << ref_out[i].size() << ", i = " << i;
}
CHECK_EQ(out[i].size(), ref_out[i].size()) << "Size error, i: " << i;
for (size_t j = 0; j < out[i].size(); j++) {
if (std::abs(out[i][j] - ref_out[i][j]) < abs_error) {
right_count++;
Expand Down
Loading