Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle-Lite
Browse files Browse the repository at this point in the history
…into mul_quant; test=develop
  • Loading branch information
newway committed Sep 26, 2021
2 parents a827668 + ae305a6 commit fac0af2
Show file tree
Hide file tree
Showing 579 changed files with 17,509 additions and 9,881 deletions.
22 changes: 11 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

include(system)
include(functions)
include(cross_compiling/preproject)
include(os/common)

project(paddle CXX C)
message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
Expand Down Expand Up @@ -194,15 +194,15 @@ if (LITE_WITH_PYTHON)
endif()

if(LITE_WITH_RKNPU)
include(device/rknpu)
include(backends/rknpu)
endif()

if(LITE_WITH_IMAGINATION_NNA)
include(device/imagination_nna)
include(backends/imagination_nna)
endif()

if(LITE_WITH_INTEL_FPGA)
include(device/intel_fpga)
include(backends/intel_fpga)
endif()

# flatbuffer module for loading model
Expand Down Expand Up @@ -234,11 +234,11 @@ endif()
# for mobile
if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
message(STATUS "Building the mobile framework")
include(cross_compiling/postproject)
include(device/npu) # check and prepare NPU DDK
include(device/xpu) # check and prepare XPU SDK
include(device/apu) # check and prepare APU SDK
include(device/huawei_ascend_npu) # check and prepare Ascend NPU SDK
include(os/postproject)
include(backends/npu) # check and prepare NPU DDK
include(backends/xpu) # check and prepare XPU SDK
include(backends/apu) # check and prepare APU SDK
include(backends/huawei_ascend_npu) # check and prepare Ascend NPU SDK

# We compile the mobile deployment library when LITE_ON_TINY_PUBLISH=ON
# So the following third party dependencies are not needed.
Expand Down Expand Up @@ -273,15 +273,15 @@ endif()
########################################################################################

if(LITE_WITH_XPU)
include(device/xpu)
include(backends/xpu)
endif()

if(LITE_WITH_MLU)
include(mlu)
endif()

if(LITE_WITH_HUAWEI_ASCEND_NPU)
include(device/huawei_ascend_npu)
include(backends/huawei_ascend_npu)
endif()

include(coveralls)
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle-Lite.svg?branch=develop&longCache=true&style=flat-square)](https://travis-ci.org/PaddlePaddle/Paddle-Lite) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://paddle-lite.readthedocs.io/zh/develop/) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle-Lite.svg)](https://github.com/PaddlePaddle/Paddle-Lite/releases) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)

Paddle Lite是一个高性能、轻量级、灵活性强且易于扩展的深度学习推理框架,定位支持包括移动端、嵌入式以及服务器端在内的多硬件平台。
Paddle Lite是一个高性能、轻量级、灵活性强且易于扩展的深度学习推理框架,定位于支持包括移动端、嵌入式以及服务器端在内的多硬件平台。

当前Paddle Lite不仅在百度内部业务中得到全面应用,也成功支持了众多外部用户和企业的生产任务。

Expand Down Expand Up @@ -55,7 +55,7 @@ Paddle Lite提供了C++、Java、Python三种API,并且提供了相应API的
## 主要特性

- **多硬件支持:**
- Paddle Lite架构已经验证和完整支持从 Mobile 到 Server [多种硬件平台](https://paddle-lite.readthedocs.io/zh/latest/introduction/support_hardware.html),包括 ARM CPU、Mali GPU、Adreno GPU、华为 NPU,以及 FPGA 等,且正在不断增加更多新硬件支持。
- Paddle Lite架构已经验证和完整支持从 Mobile 到 Server [多种硬件平台](https://paddle-lite.readthedocs.io/zh/latest/introduction/support_hardware.html),包括 ARM CPU、Mali GPU、Adreno GPU、英伟达 GPU、苹果 GPU、华为 NPU,以及 FPGA 等,且正在不断增加更多新硬件支持。
- 各个硬件平台的 Kernel 在代码层和执行层互不干扰,用户不仅可以自由插拔任何硬件,还支持任意系统可见硬件之间的[混合调度](https://paddle-lite.readthedocs.io/zh/latest/introduction/tech_highlights.html#id7)。
- **轻量级部署**
- Paddle Lite在设计上对图优化模块和执行引擎实现了良好的解耦拆分,移动端可以直接部署执行阶段,无任何第三方依赖。
Expand All @@ -67,7 +67,7 @@ Paddle Lite提供了C++、Java、Python三种API,并且提供了相应API的
- Paddle Lite和PaddlePaddle训练框架的OP对齐,提供广泛的模型支持能力。
- 目前已严格验证24个模型200个OP的精度和性能,对视觉类模型做到了较为充分的支持,覆盖分类、检测和定位,包含了特色的OCR模型的支持,并在不断丰富中。具体请参考[支持OP](https://paddle-lite.readthedocs.io/zh/latest/introduction/support_operation_list.html)。
- **强大的图分析和优化能力**
- 不同于常规的移动端预测引擎基于 Python 脚本工具转化模型, Lite 架构上有完整基于 C++ 开发的 IR 及相应 Pass 集合,以支持操作熔合,计算剪枝,存储优化,量化计算等多类计算图优化。更多的优化策略可以简单通过 [新增 Pass](https://paddle-lite.readthedocs.io/zh/latest/develop_guides/add_new_pass.html) 的方式模块化支持。
- 不同于常规的移动端预测引擎基于 Python 脚本工具转化模型, Lite 架构上有完整基于 C++ 开发的 IR 及相应 Pass 集合,以支持操作融合,计算剪枝,存储优化,量化计算等多类计算图优化。更多的优化策略可以简单通过 [新增 Pass](https://paddle-lite.readthedocs.io/zh/latest/develop_guides/add_new_pass.html) 的方式模块化支持。

## 持续集成

Expand All @@ -76,6 +76,7 @@ Paddle Lite提供了C++、Java、Python三种API,并且提供了相应API的
| CPU(32bit) | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) |
| CPU(64bit) | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) |
| OpenCL | - | - | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) | - |
| Metal | - | - | - | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) |
| FPGA | - | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) | - | - |
| 华为NPU | - | - | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) | - |
| 百度 XPU | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) | ![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg) | - | - |
Expand Down
10 changes: 4 additions & 6 deletions README_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ The latest benchmark is located at [benchmark](https://paddlepaddle.github.io/Pa

### High Compatibility

Hardware compatibility: Paddle Lite supports a diversity of hardwares — ARM CPU, Mali GPU, Adreno GPU, Huawei NPU and FPGA. In the near future, we will also support AI microchips from Cambricon and Bitmain.
Hardware compatibility: Paddle Lite supports a diversity of hardwares — ARM CPU, Mali GPU, Adreno GPU, Nvidia GPU, Apple GPU, Huawei NPU and FPGA. In the near future, we will also support AI microchips from Cambricon and Bitmain.

Model compatibility: The Op of Paddle Lite is fully compatible to that of PaddlePaddle. The accuracy and performance of 18 models (mostly CV models and OCR models) and 85 operators have been validated. In the future, we will also support other models.

Expand All @@ -43,15 +43,13 @@ Paddle Lite is designed to support a wide range of hardwares and devices, and it

![img](https://user-images.githubusercontent.com/45189361/70908123-6ce4fd00-2045-11ea-97e1-ad08446c5c86.png)

As is shown in the figure above, analysis phase includes Machine IR module, and it enables optimizations like Op fusion and redundant computation pruning. Besides, excecution phase only involves Kernal exevution, so it can be deployed on its own to ensure maximized light-weighted deployment.
As is shown in the figure above, analysis phase includes Machine IR module, and it enables optimizations like Op fusion and redundant computation pruning. Besides, excecution phase only involves Kernal execution, so it can be deployed on its own to ensure maximum light-weighted deployment.

## Key Info about the Update

The earlier Paddle-Mobile was designed to be compatible with PaddlePaddle and multiple hardwares, including ARM CPU, Mali GPU, Adreno GPU, FPGA, ARM-Linux and Apple's GPU Metal. Within Baidu, inc, many product lines have been using Paddle-Mobile. For more details, please see: [mobile/README](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/README.md).
The earlier Paddle-Mobile was designed to be compatible with PaddlePaddle and multiple hardwares, including ARM CPU, Mali GPU, Adreno GPU, FPGA, ARM-Linux and Apple's GPU Metal. Within Baidu, inc, many product lines have been using Paddle-Mobile.

As an update of Paddle-Mobile, Paddle Lite has incorporated many older capabilities into the [new architecture](https://github.com/PaddlePaddle/Paddle-Lite/tree/develop/lite). For the time being, the code of Paddle-mobile will be kept under the directory `mobile/`, before complete transfer to Paddle Lite.

For demands of Apple's GPU Metal and web front end inference, please see `./metal` and `./web` . These two modules will be further developed and maintained.
As an update of Paddle-Mobile, Paddle Lite has incorporated many older capabilities into the [new architecture](https://github.com/PaddlePaddle/Paddle-Lite/tree/develop/lite).

## Special Thanks

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
25 changes: 22 additions & 3 deletions cmake/functions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,14 @@ function(add_kernel TARGET device level)
get_filename_component(filename ${src} NAME_WE) # conv_compute.cc => conv_compute
set(kernel_tailor_src_dir "${CMAKE_BINARY_DIR}/kernel_tailor_src_dir")
set(suffix "for_strip")
set(dst_file ${dst_file} "${kernel_tailor_src_dir}/${filename}_${device_name}_${suffix}.cc") # conv_compute_arm.cc
set(src_file "${kernel_tailor_src_dir}/${filename}_${device_name}_${suffix}.cc") # conv_compute_arm.cc
if("${device}" STREQUAL "METAL")
set(dst_file ${dst_file} "${kernel_tailor_src_dir}/${filename}_${device_name}_${suffix}.mm") # conv_compute_apple_metal_for_strip.mm
set(src_file "${kernel_tailor_src_dir}/${filename}_${device_name}_${suffix}.mm") # conv_compute_apple_metal_for_strip.mm
endif()
if(NOT EXISTS ${dst_file})
if(NOT EXISTS ${src_file})
return()
endif()
set(dst_file ${dst_file} "${src_file}")
endforeach()
file(APPEND ${kernels_src_list} "${dst_file}\n")
set(KERNELS_SRC ${KERNELS_SRC} "${dst_file}" CACHE INTERNAL "kernels source")
Expand Down Expand Up @@ -196,6 +197,24 @@ function(lite_cc_test TARGET)
add_dependencies(lite_compile_deps ${TARGET})
endif()

# link to dynamic runtime lib
if(LITE_WITH_RKNPU)
target_link_libraries(${TARGET} ${rknpu_runtime_libs})
endif()
if(LITE_WITH_IMAGINATION_NNA)
target_link_libraries(${TARGET} ${imagination_nna_builder_libs} ${imagination_nna_runtime_libs})
endif()
if(LITE_WITH_HUAWEI_ASCEND_NPU)
target_link_libraries(${TARGET} ${huawei_ascend_npu_runtime_libs} ${huawei_ascend_npu_builder_libs})
endif()
if(LITE_WITH_NPU)
target_link_libraries(${TARGET} ${npu_builder_libs} ${npu_runtime_libs})
endif()
if(LITE_WITH_CUDA)
get_property(cuda_deps GLOBAL PROPERTY CUDA_MODULES)
target_link_libraries(${TARGET} ${cuda_deps})
endif()

common_link(${TARGET})
add_test(NAME ${TARGET}
COMMAND ${TARGET} ${args_ARGS}
Expand Down
51 changes: 29 additions & 22 deletions cmake/lite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -261,12 +261,34 @@ function(lite_cc_binary TARGET)
# link to paddle-lite static lib automatically
add_dependencies(${TARGET} bundle_full_api)



if(NOT WIN32)
target_link_libraries(${TARGET} ${CMAKE_BINARY_DIR}/libpaddle_api_full_bundled.a)
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
else()
target_link_libraries(${TARGET} ${CMAKE_BINARY_DIR}/lite/api/${CMAKE_BUILD_TYPE}/libpaddle_api_full_bundled.lib)
endif()


# link to dynamic runtime lib
if(LITE_WITH_RKNPU)
target_link_libraries(${TARGET} ${rknpu_runtime_libs})
endif()
if(LITE_WITH_IMAGINATION_NNA)
target_link_libraries(${TARGET} ${imagination_nna_builder_libs} ${imagination_nna_runtime_libs})
endif()
if(LITE_WITH_HUAWEI_ASCEND_NPU)
target_link_libraries(${TARGET} ${huawei_ascend_npu_runtime_libs} ${huawei_ascend_npu_builder_libs})
endif()
if(LITE_WITH_NPU)
target_link_libraries(${TARGET} ${npu_builder_libs} ${npu_runtime_libs})
endif()
if(LITE_WITH_CUDA)
get_property(cuda_deps GLOBAL PROPERTY CUDA_MODULES)
target_link_libraries(${TARGET} ${cuda_deps})
endif()

if (NOT APPLE AND NOT WIN32)
# strip binary target to reduce size
if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
Expand All @@ -282,23 +304,6 @@ function(lite_cc_binary TARGET)
endif()
endfunction()


# file to record subgraph bridges for new hardware
set(subgraph_bridges_src_list "${CMAKE_BINARY_DIR}/subgraph_bridges_src_list.txt")
file(WRITE ${subgraph_bridges_src_list} "") # clean

# add a subgraph bridge for some new hardware which support some op by subgraph
# device: such as npu, rknpu, apu, huawei_ascend_npu, imagination_nna, nnadapter
function(add_subgraph_bridge)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
foreach(src ${args_SRCS})
file(APPEND ${subgraph_bridges_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
endfunction(add_subgraph_bridge)

#only for windows
function(create_static_lib TARGET_NAME)
set(libs ${ARGN})
Expand Down Expand Up @@ -382,6 +387,9 @@ function(bundle_static_library tgt_name bundled_tgt_name fake_target)
return()
endif()

add_custom_target(${fake_target})
add_dependencies(${fake_target} ${tgt_name})

if(NOT IOS AND NOT APPLE)
file(WRITE ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar.in
"CREATE ${bundled_tgt_full_name}\n" )
Expand All @@ -404,8 +412,9 @@ function(bundle_static_library tgt_name bundled_tgt_name fake_target)
endif()

add_custom_command(
TARGET ${fake_target} PRE_BUILD
COMMAND rm -f ${bundled_tgt_full_name}
COMMAND ${ar_tool} -M < ${CMAKE_BINARY_DIR}/${bundled_tgt_name}.ar
OUTPUT ${bundled_tgt_full_name}
COMMENT "Bundling ${bundled_tgt_name}"
DEPENDS ${tgt_name}
VERBATIM)
Expand All @@ -414,15 +423,13 @@ function(bundle_static_library tgt_name bundled_tgt_name fake_target)
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
add_custom_command(
TARGET ${fake_target} PRE_BUILD
COMMAND rm -f ${bundled_tgt_full_name}
COMMAND /usr/bin/libtool -static -o ${bundled_tgt_full_name} ${libfiles}
DEPENDS ${tgt_name}
OUTPUT ${bundled_tgt_full_name}
)
endif()

add_custom_target(${fake_target} ALL DEPENDS ${bundled_tgt_full_name})
add_dependencies(${fake_target} ${tgt_name})

add_library(${bundled_tgt_name} STATIC IMPORTED)
set_target_properties(${bundled_tgt_name}
PROPERTIES
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
10 changes: 5 additions & 5 deletions cmake/cross_compiling/preproject.cmake → cmake/os/common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,18 @@ message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS_RELEASE: ${CMAKE_CXX_FLAGS_RELEASE}")

if(ARM_TARGET_OS STREQUAL "android")
include(cross_compiling/android)
include(os/android)
endif()
if(ARM_TARGET_OS STREQUAL "armlinux")
include(cross_compiling/armlinux)
include(os/armlinux)
endif()
if(ARM_TARGET_OS STREQUAL "ios" OR ARM_TARGET_OS STREQUAL "ios64")
include(cross_compiling/ios)
include(os/ios)
endif()
if(ARM_TARGET_OS STREQUAL "armmacos")
include(cross_compiling/armmacos)
include(os/armmacos)
endif()
include(cross_compiling/host)
include(os/host)

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Default use Release in android" FORCE)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
endif()
include(CheckCXXCompilerFlag)
if(ANDROID)
include(cross_compiling/findar)
include(os/findar)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -llog -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog -fPIC")
if(LITE_WITH_ARM82_FP16)
Expand Down
26 changes: 26 additions & 0 deletions docs/api_reference/cxx_api_doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,32 @@ std::shared_ptr<PaddlePredictor> predictor = CreatePaddlePredictor<MobileConfig>

返回类型:`int`



### `set_metal_lib_path(path)`

用于iOS设备上使用Metal进行GPU预测时,配置metallib加载路径。

参数:

- `path(str)` - metallib库文件路径

返回类型:`void`



### `set_metal_use_mps(flag)`

设置iOS设备上使用Metal进行GPU预测时,是否启用[Metal Performance Shaders](https://developer.apple.com/documentation/metalperformanceshaders)。若不设置,默认不使用(建议启用)。

参数:

- `flag(bool)` - 是否使用MPS

返回:是否使用Metal Performance Shaders

返回类型:`bool`

## PaddlePredictor

```c++
Expand Down
Loading

0 comments on commit fac0af2

Please sign in to comment.