Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LITE][NPU][XPU] Refine subgraph pass, and support NPU/XPU model generation at execution time #2576

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
7b014c4
[LITE][ALL] Porting subgraph detector from paddle inference, Refine s…
hong19860320 Dec 2, 2019
70cfd1a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle-Lite…
hong19860320 Dec 2, 2019
02973c0
[LITE][NPU] Support NPU model generation on execution time
hong19860320 Dec 6, 2019
0d5cfa3
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle-Lite…
hong19860320 Dec 6, 2019
07a07bc
[LITE][NPU] Revert the modification of unit test helper of bridges
hong19860320 Dec 6, 2019
92a9f3e
[LITE][XPU] Revert the modification of unit test helper of bridges
hong19860320 Dec 6, 2019
62b84ff
[LITE][XPU] Support XPU model generation at execution time
hong19860320 Dec 10, 2019
2af2af3
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle-Lite…
hong19860320 Dec 10, 2019
d8b8cb5
[LITE][NPU][XPU] Rename the context to the graph for NPU/XPU bridges
hong19860320 Dec 10, 2019
af455d0
[LITE][NPU][XPU] Fix the input param of the unit test of subgraph pass
hong19860320 Dec 10, 2019
2b5a8a9
[LITE][NPU][XPU] Fix the NPU/XPU model generation by using model_opti…
hong19860320 Dec 10, 2019
843c6fb
[LITE][NPU][XPU] Move subgraph bridge registry and engine to kernels/…
hong19860320 Dec 11, 2019
e7e1781
[LITE][NPU][XPU] Refine code including removing unused code and host …
hong19860320 Dec 11, 2019
a483d33
[LITE][NPU][XPU] Use the pointer to access the sub block desc in subg…
hong19860320 Dec 12, 2019
0a1b0f2
[LITE][XPU]Fix the build_extra flag for XPU compiling
hong19860320 Dec 13, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions cmake/lite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ file(WRITE ${offline_lib_registry_file} "") # clean
function(lite_cc_library TARGET)
set(options SHARED shared STATIC static MODULE module)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS NPU_DEPS XPU_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS LIGHT_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS LIGHT_DEPS
HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

Expand All @@ -128,10 +128,10 @@ function(lite_cc_library TARGET)
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
Expand Down Expand Up @@ -161,7 +161,7 @@ function(lite_cc_binary TARGET)
set(options " -g ")
endif()
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

Expand All @@ -173,6 +173,8 @@ function(lite_cc_binary TARGET)
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
Expand Down Expand Up @@ -205,7 +207,7 @@ function(lite_cc_test TARGET)
endif()
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS
COMPILE_LEVEL # (basic|extra)
Expand All @@ -225,6 +227,8 @@ function(lite_cc_test TARGET)
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
Expand Down Expand Up @@ -267,7 +271,7 @@ endif()
function(add_kernel TARGET device level)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
Expand Down Expand Up @@ -360,11 +364,12 @@ function(add_kernel TARGET device level)
lite_cc_library(${TARGET} SRCS ${args_SRCS}
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
XPU_DEPS ${args_XPU_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
Expand All @@ -383,7 +388,7 @@ endif()
function(add_operator TARGET level)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
Expand All @@ -409,11 +414,12 @@ function(add_operator TARGET level)
lite_cc_library(${TARGET} SRCS ${args_SRCS}
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
XPU_DEPS ${args_XPU_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
Expand Down
2 changes: 1 addition & 1 deletion cmake/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ else()
endif()

find_library(XPU_SDK_LLVM_FILE NAMES LLVM-8
PATHS ${XPU_SDK_ROOT}/XTDK/shlib)
PATHS ${XPU_SDK_ROOT}/XTDK/shlib/gcc482)

if(NOT XPU_SDK_LLVM_FILE)
message(FATAL_ERROR "Can not find LLVM Library in ${XPU_SDK_ROOT}")
Expand Down
6 changes: 3 additions & 3 deletions lite/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ else()
add_dependencies(paddle_light_api_shared op_list_h kernel_list_h)
if (LITE_WITH_NPU)
# Need to add HIAI runtime libs (libhiai.so) dependency
target_link_libraries(paddle_light_api_shared ${npu_runtime_libs})
target_link_libraries(paddle_light_api_shared ${npu_builder_libs} ${npu_runtime_libs})
endif()
endif()
endif()
Expand Down Expand Up @@ -78,8 +78,8 @@ if (NOT LITE_ON_TINY_PUBLISH)
DEPS ${cxx_api_deps} ${ops} ${host_kernels} program
X86_DEPS ${x86_kernels}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels} ${npu_bridges} npu_pass
XPU_DEPS ${xpu_kernels} ${xpu_bridges} xpu_pass
NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels})
endif()
Expand Down
2 changes: 1 addition & 1 deletion lite/api/_paddle_use_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ USE_LITE_OP(while)
USE_LITE_OP(lod_reset)
USE_LITE_OP(lookup_table)
USE_LITE_OP(multiclass_nms)
USE_LITE_OP(graph_op)
USE_LITE_OP(subgraph)
USE_LITE_OP(sequence_expand)
USE_LITE_OP(sequence_pool)
USE_LITE_OP(reduce_max)
Expand Down
2 changes: 1 addition & 1 deletion lite/api/android/jni/native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ else()
add_dependencies(paddle_lite_jni op_list_h kernel_list_h)
if (LITE_WITH_NPU)
# Need to add HIAI runtime libs (libhiai.so) dependency
target_link_libraries(paddle_lite_jni ${npu_runtime_libs})
target_link_libraries(paddle_lite_jni ${npu_builder_libs} ${npu_runtime_libs})
endif()
endif()

Expand Down
13 changes: 3 additions & 10 deletions lite/api/cxx_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -139,22 +139,15 @@ std::vector<std::string> Predictor::GetOutputNames() { return output_names_; }

// append the names of inputs and outputs into input_names_ and output_names_
void Predictor::PrepareFeedFetch() {
std::vector<const cpp::OpDesc *> feeds;
std::vector<const cpp::OpDesc *> fetchs;
#if defined(LITE_WITH_NPU) || defined(LITE_WITH_XPU)
// The shape of input tensors must be determined before generating NPU and XPU
// program.
auto current_block = program_desc_.GetBlock<cpp::BlockDesc>(0);
for (size_t i = 0; i < current_block->OpsSize(); i++) {
auto op = current_block->GetOp<cpp::OpDesc>(i);
#else
if (!program_) {
GenRuntimeProgram();
}

std::vector<const cpp::OpDesc *> feeds;
std::vector<const cpp::OpDesc *> fetchs;
const auto &insts = program_->instructions();
for (size_t i = 0; i < program_->num_instructions(); i++) {
const auto &op = insts[i].op()->op_info();
#endif
if (op->Type() == "feed") {
feeds.push_back(op);
} else if (op->Type() == "fetch") {
Expand Down
4 changes: 4 additions & 0 deletions lite/api/model_optimize_tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ std::vector<Place> ParserValidPlaces() {
TARGET(kARM)); // enable kARM CPU kernel when no opencl kernel
} else if (target_repr == "x86") {
valid_places.emplace_back(TARGET(kX86));
} else if (target_repr == "npu") {
valid_places.emplace_back(TARGET(kNPU));
} else if (target_repr == "xpu") {
valid_places.emplace_back(TARGET(kXPU));
} else {
LOG(FATAL) << lite::string_format(
"Wrong target '%s' found, please check the command flag "
Expand Down
8 changes: 2 additions & 6 deletions lite/api/paddle_use_passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,6 @@ USE_MIR_PASS(static_kernel_pick_pass);
USE_MIR_PASS(variable_place_inference_pass);
USE_MIR_PASS(type_target_cast_pass);
USE_MIR_PASS(generate_program_pass);
#ifdef LITE_WITH_NPU
USE_MIR_PASS(generate_npu_program_pass);
#endif
#ifdef LITE_WITH_XPU
USE_MIR_PASS(generate_xpu_program_pass);
#endif

USE_MIR_PASS(io_copy_kernel_pick_pass);
USE_MIR_PASS(argument_type_display_pass);
Expand All @@ -45,3 +39,5 @@ USE_MIR_PASS(lite_quant_dequant_fuse_pass);
USE_MIR_PASS(type_precision_cast_pass);
USE_MIR_PASS(type_layout_cast_pass);
USE_MIR_PASS(memory_optimize_pass);
USE_MIR_PASS(npu_subgraph_pass);
USE_MIR_PASS(xpu_subgraph_pass);
3 changes: 1 addition & 2 deletions lite/backends/npu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ if(NOT LITE_WITH_NPU)
return()
endif()

lite_cc_library(npu_runtime SRCS runtime.cc DEPS ${npu_runtime_libs})
lite_cc_library(npu_builder SRCS builder.cc DEPS ${npu_builder_libs} npu_runtime tensor op scope)
lite_cc_library(device_npu SRCS device.cc DEPS ${npu_builder_libs} ${npu_runtime_libs})
69 changes: 69 additions & 0 deletions lite/backends/npu/device.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/backends/npu/device.h"
#include "lite/utils/cp_logging.h"

namespace paddle {
namespace lite {
namespace npu {

std::unique_ptr<hiai::AiModelMngerClient> Device::Build(
std::string& model_name, // NOLINT
std::vector<ge::Operator>& input_nodes, // NOLINT
std::vector<ge::Operator>& output_nodes // NOLINT
) {
VLOG(3) << "[NPU] Build model";
// Build the HiAI IR graph to the HiAI om model
ge::Graph ir_graph("graph");
ir_graph.SetInputs(input_nodes).SetOutputs(output_nodes);
ge::Model om_model("model", "model");
om_model.SetGraph(ir_graph);
domi::HiaiIrBuild ir_build;
domi::ModelBufferData om_model_buf;
if (!ir_build.CreateModelBuff(om_model, om_model_buf)) {
LOG(WARNING) << "[NPU] CreateModelBuff failed!";
return nullptr;
}
if (!ir_build.BuildIRModel(om_model, om_model_buf)) {
LOG(WARNING) << "[NPU] BuildIRModel failed!";
ir_build.ReleaseModelBuff(om_model_buf);
return nullptr;
}
// Create a HiAI model manager client to load the HiAI om model
std::unique_ptr<hiai::AiModelMngerClient> model_client(
new hiai::AiModelMngerClient());
if (model_client->Init(nullptr) != hiai::AI_SUCCESS) {
LOG(WARNING) << "[NPU] AiModelMngerClient init failed)!";
ir_build.ReleaseModelBuff(om_model_buf);
return nullptr;
}
model_name = "model_" + std::to_string(model_count_++) + ".om";
auto model_desc = std::make_shared<hiai::AiModelDescription>(
model_name, freq_level(), framework_type(), model_type(), device_type());
model_desc->SetModelBuffer(om_model_buf.data, om_model_buf.length);
std::vector<std::shared_ptr<hiai::AiModelDescription>> model_descs;
model_descs.push_back(model_desc);
if (model_client->Load(model_descs) != hiai::AI_SUCCESS) {
LOG(WARNING) << "[NPU] AiModelMngerClient load model failed!";
ir_build.ReleaseModelBuff(om_model_buf);
return nullptr;
}
ir_build.ReleaseModelBuff(om_model_buf);
return model_client;
}

} // namespace npu
} // namespace lite
} // namespace paddle
25 changes: 17 additions & 8 deletions lite/backends/npu/runtime.h → lite/backends/npu/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,38 +13,47 @@
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ai_ddk_lib/include/HiAiModelManagerService.h"
#include "lite/core/tensor.h"
#include "ai_ddk_lib/include/hiai_ir_build.h"

namespace paddle {
namespace lite {
namespace npu {

class DeviceInfo {
class Device {
public:
static DeviceInfo &Global() {
static DeviceInfo x;
static Device& Global() {
static Device x;
return x;
}
DeviceInfo() {}
Device() {}

int freq_level() { return freq_level_; }
int framework_type() { return framework_type_; }
int model_type() { return model_type_; }
int device_type() { return device_type_; }

// Build the HiAI IR graph to om model, return HiAI model manager client to
// load om model and run inference.
std::unique_ptr<hiai::AiModelMngerClient> Build(
std::string& model_name, // NOLINT
std::vector<ge::Operator>& input_nodes, // NOLINT
std::vector<ge::Operator>& output_nodes // NOLINT
); // NOLINT

private:
int freq_level_{3};
int framework_type_{0};
int model_type_{0};
int device_type_{0};
int model_count_{0};
};

bool LoadModel(const lite::Tensor &model_data,
std::shared_ptr<hiai::AiModelMngerClient> *model_client,
std::string *model_name);
} // namespace npu
} // namespace lite
} // namespace paddle
Loading