Refactor ACL integration to support JSON runtime

* Now uses JSON runtime * Addresses tutorial comments * Rename acl to arm_compute_lib in user facing api Change-Id: I3b5ef80607f713e898363e82ab4398fbc2cf267a
apache · Jul 9, 2020 · 6d91877 · 6d91877
1 parent 2ea82e8
commit 6d91877
Show file tree

Hide file tree

Showing 28 changed files with 1,144 additions and 1,539 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -70,8 +70,8 @@ tvm_option(USE_CPP_RPC "Build CPP RPC" OFF)
 tvm_option(USE_TFLITE "Build with tflite support" OFF)
 tvm_option(USE_TENSORFLOW_PATH "TensorFlow root path when use TFLite" none)
 tvm_option(USE_COREML "Build with coreml support" OFF)
-tvm_option(USE_ACL "Build with Arm Compute Library" OFF)
-tvm_option(USE_ACL_GRAPH_RUNTIME "Build with Arm Compute Library graph runtime" OFF)
+tvm_option(USE_ARM_COMPUTE_LIB "Build with Arm Compute Library" OFF)
+tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library graph runtime" OFF)
 
 if(USE_CPP_RPC AND UNIX)
   message(FATAL_ERROR "USE_CPP_RPC is only supported with WIN32. Use the Makefile for non-Windows.")
@@ -328,7 +328,7 @@ include(cmake/modules/contrib/HybridDump.cmake)
 include(cmake/modules/contrib/TFLite.cmake)
 include(cmake/modules/contrib/TF_TVMDSOOP.cmake)
 include(cmake/modules/contrib/CoreML.cmake)
-include(cmake/modules/contrib/ACL.cmake)
+include(cmake/modules/contrib/ArmComputeLib.cmake)
 
 include(CheckCXXCompilerFlag)
 if(NOT MSVC)

diff --git a/cmake/modules/contrib/ACL.cmake → cmake/modules/contrib/ArmComputeLib.cmake b/cmake/modules/contrib/ACL.cmake → cmake/modules/contrib/ArmComputeLib.cmake
@@ -19,23 +19,22 @@
 # for AArch. In the world where we take the cross compilation approach,
 # which is common with arm devices, we need to be able to cross-compile
 # a relay graph on x86 for AArch and then run the graph on AArch.
-if(USE_ACL)
-    file(GLOB ACL_RELAY_CONTRIB_SRC src/relay/backend/contrib/acl/*.cc)
-    file(GLOB ACL_RUNTIME_MODULE src/runtime/contrib/acl/acl_runtime.cc)
+if(USE_ARM_COMPUTE_LIB)
+    file(GLOB ACL_RELAY_CONTRIB_SRC src/relay/backend/contrib/arm_compute_lib/*.cc)
+    file(GLOB ACL_RUNTIME_MODULE src/runtime/contrib/arm_compute_lib/acl_runtime.cc)
     list(APPEND COMPILER_SRCS ${ACL_RELAY_CONTRIB_SRC})
     list(APPEND COMPILER_SRCS ${ACL_RUNTIME_MODULE})
-    message(STATUS "Build with ACL support...")
+    message(STATUS "Build with Arm Compute Library support...")
 endif()
 
-if(USE_ACL_GRAPH_RUNTIME)
+if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME)
     set(ACL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/acl)
     # Detect custom ACL path.
-    if (NOT USE_ACL_GRAPH_RUNTIME STREQUAL "ON")
-        set(ACL_PATH ${USE_ACL_GRAPH_RUNTIME})
+    if (NOT USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME STREQUAL "ON")
+        set(ACL_PATH ${USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME})
     endif()
 
-    file(GLOB ACL_CONTRIB_SRC src/runtime/contrib/acl/*)
-    file(GLOB ACL_API src/relay/backend/contrib/acl/acl_api.cc)
+    file(GLOB ACL_CONTRIB_SRC src/runtime/contrib/arm_compute_lib/*)
 
     set(ACL_INCLUDE_DIRS ${ACL_PATH}/include ${ACL_PATH})
     include_directories(${ACL_INCLUDE_DIRS})
@@ -57,12 +56,11 @@ if(USE_ACL_GRAPH_RUNTIME)
     list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_CORE_LIB})
     list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
     list(APPEND RUNTIME_SRCS ${ACL_CONTRIB_SRC})
-    list(APPEND RUNTIME_SRCS ${ACL_API})
-    message(STATUS "Build with ACL graph runtime support: "
+    message(STATUS "Build with Arm Compute Library graph runtime support: "
             ${EXTERN_ACL_COMPUTE_LIB} ", \n"
             ${EXTERN_ACL_COMPUTE_CORE_LIB} ", \n"
             ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
 
     # Set flag to detect ACL graph runtime support.
-    add_definitions(-DTVM_GRAPH_RUNTIME_ACL)
+    add_definitions(-DTVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB)
 endif()
diff --git a/src/relay/backend/contrib/acl/README.md → docs/deploy/arm_compute_lib.rst b/src/relay/backend/contrib/acl/README.md → docs/deploy/arm_compute_lib.rst
@@ -1,48 +1,58 @@
-<!---
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-# Relay Arm&reg; Compute Library Integration
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+Relay Arm|reg| Compute Library Integration
+==========================================
+
+Introduction
+------------
+
 Arm Compute Library (ACL) is an open source project that provides accelerated kernels for Arm CPU's
 and GPU's. Currently the integration offloads operators to ACL to use hand-crafted assembler
 routines in the library. By offloading select operators from a relay graph to ACL we can achieve
 a performance boost on such devices.
 
-## Building with ACL support
+Building with ACL support
+-------------------------
 The current implementation has two separate build options in cmake. The reason for this split is
 because ACL cannot be used on an x86 machine. However, we still want to be able compile an ACL
 runtime module on an x86 machine.
 
-* USE_ACL=ON/OFF - Enabling this flag will add support for compiling an ACL runtime module.
-* USE_GRAPH_RUNTIME_ACL=ON/OFF/path-to-acl - Enabling this flag will allow the graph runtime to
-compute the ACL offloaded functions.
+* USE_ARM_COMPUTE_LIB=ON/OFF - Enabling this flag will add support for compiling an ACL runtime module.
+* USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON/OFF/path-to-acl - Enabling this flag will allow the graph runtime to
+  compute the ACL offloaded functions.
 
 These flags can be used in different scenarios depending on your setup. For example, if you want
 to compile ACL on an x86 machine and then run the module on a remote Arm device via RPC, you will
 need to use USE_ACL=ON on the x86 machine and USE_GRAPH_RUNTIME_ACL=ON on the remote AArch64
 device.
-## Usage
-_Note:_ this may not stay up-to-date with changes to the API.
-1. Create a relay graph. This may be a single operator or a whole graph. The intention is that any
+
+Usage
+-----
+
+*Note:* this section may not stay up-to-date with changes to the API.
+
+Create a relay graph. This may be a single operator or a whole graph. The intention is that any
 relay graph can be input. The ACL integration will only pick supported operators to be offloaded
 whilst the rest will be computed via TVM. (For this example we will use a single
 max_pool2d operator).
-    ```
+
+..code:: python
+
     import tvm
     from tvm import relay
 
@@ -55,57 +65,70 @@ max_pool2d operator).
     output_shape = (1, 7, 7, 512)
 
     data = relay.var('data', shape=data_shape, dtype=data_type)
-    out = relay.nn.max_pool2d(data, pool_size=pool_size, strides=strides,
-                              layout=layout, padding=padding)
+    out = relay.nn.max_pool2d(data, pool_size=pool_size, strides=strides, layout=layout, padding=padding)
     module = tvm.IRModule.from_expr(out)
-    ```
-2. Annotate and partition the graph for ACL.
-    ```
-    module = relay.transform.AnnotateTarget("acl")(module)
-    module = relay.transform.PartitionGraph()(module)
-    ```
-3. Build the Relay graph.
-    ```
-    target = "llvm -target=aarch64-linux-gnu -mattr=+neon"
-    with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
-            json, lib, params = relay.build(module, target=target)
-    ```
-4. Export the module.
-    ```
+
+
+Annotate and partition the graph for ACL.
+
+..code:: python
+
+    from tvm.relay.op.contrib.arm_compute_lib import partition_for_arm_compute_lib
+    partition_for_arm_compute_lib(module)
+
+
+Build the Relay graph.
+
+..code:: python
+
+    target = "llvm -mtriple=aarch64-linux-gnu -mattr=+neon"
+    with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
+        json, lib, params = relay.build(module, target=target)
+
+
+Export the module.
+
+..code:: python
+
     lib_path = '~/lib_acl.so'
     cross_compile = 'aarch64-linux-gnu-c++'
     lib.export_library(lib_path, cc=cross_compile)
-    ```
- 5. Run Inference. This must be on an Arm device. If compiling on x86 device and running on aarch64
- consider using the RPC mechanism.
-    ```
+
+
+Run Inference. This must be on an Arm device. If compiling on x86 device and running on aarch64
+consider using the RPC mechanism.
+
+..code:: python
+
     tvm.runtime.load_module('lib_acl.so')
     gen_module = tvm.contrib.graph_runtime.create(json, lib, ctx)
-
     d_data = np.random.uniform(0, 1, data_shape).astype(data_type)
     map_inputs = {'data': d_data}
     gen_module.map_inputs(**map_inputs)
     gen_module.run()
-    ```
 
-## More examples
+
+More examples
+-------------
 The example above only shows a basic example of how ACL can be used for offloading a single
 Maxpool2D. If you would like to see more examples for each implemented operator and for
-networks refer to the tests: `tests/python/contrib/test_acl`. Here you can modify
+networks refer to the tests: `tests/python/contrib/test_arm_compute_lib`. Here you can modify
 `infrastructure.py` to use the remote device you have setup.
 
-## Adding a new operator
+
+Adding a new operator
+---------------------
 Adding a new operator requires changes to a series of places. This section will give a hint on
 what needs to be changed and where, it will not however dive into the complexities for an
 individual operator. This is left to the developer.
 
 There are a series of files we need to make changes to:
-* `python/relay/op/contrib/acl.py` In this file we define the operators we wish to offload using the
+* `python/relay/op/contrib/arm_compute_lib.py` In this file we define the operators we wish to offload using the
 `op.register` decorator. This will mean the annotation pass recognizes this operator as ACL
 offloadable.
-* `src/relay/backend/contrib/acl/codegen_acl.h` Implement `Make[OpName]` method. This is where we
+* `src/relay/backend/contrib/arm_compute_lib/codegen_acl.h` Implement `Make[OpName]` method. This is where we
 declare how the operator should be represented by JSON. This will be used to create the ACL module.
-* `src/runtime/contrib/acl/acl_kernel.h` Implement `Create[OpName]Layer` method. This is where we
+* `src/runtime/contrib/arm_compute_lib/acl_kernel.h` Implement `Create[OpName]Layer` method. This is where we
 define how the JSON representation can be used to create an ACL function. We simply define how to
 translate from the JSON representation to ACL API.
-* `tests/python/contrib/test_acl` Add unit tests for the given operator.
+* `tests/python/contrib/test_arm_compute_lib` Add unit tests for the given operator.
diff --git a/python/tvm/relay/op/contrib/__init__.py b/python/tvm/relay/op/contrib/__init__.py
@@ -18,6 +18,6 @@
 """Contrib modules."""
 from .register import get_pattern_table, register_pattern_table
 
-from .acl import *
+from .arm_compute_lib import *
 from .dnnl import *
 from .coreml import *
diff --git a/python/tvm/relay/op/contrib/acl.py → ...n/tvm/relay/op/contrib/arm_compute_lib.py b/python/tvm/relay/op/contrib/acl.py → ...n/tvm/relay/op/contrib/arm_compute_lib.py
@@ -24,20 +24,20 @@
 from .register import register_pattern_table
 
 
-def is_acl_runtime_present():
+def is_arm_compute_runtime_present():
     """Check if the ACL graph runtime is present.
 
     Returns
     -------
     ret: bool
         True if present, False if not.
     """
-    return tvm.get_global_func("relay.op.is_acl_runtime_enabled", True)
+    return tvm.get_global_func("relay.op.is_arm_compute_runtime_enabled", True)
 
 
-def partition_for_acl(mod, params=None):
+def partition_for_arm_compute_lib(mod, params=None):
     """Partition the graph greedily offloading supported
-    operators to ACL.
+    operators to Arm Compute Library.
 
     Parameters
     ----------
@@ -54,13 +54,13 @@ def partition_for_acl(mod, params=None):
         mod['main'] = bind_params_by_name(mod['main'], params)
 
     seq = tvm.transform.Sequential([transform.MergeComposite(pattern_table()),
-                                    transform.AnnotateTarget('acl'),
+                                    transform.AnnotateTarget('arm_compute_lib'),
                                     transform.PartitionGraph()])
 
     return seq(mod)
 
 
-@register_pattern_table("acl")
+@register_pattern_table("arm_compute_lib")
 def pattern_table():
     """Get the ACL pattern table."""
 
@@ -85,11 +85,11 @@ def check_conv(extract):
             call = call.args[0]
         return conv2d(call.attrs, call.args)
 
-    return [('acl.conv2d', conv_pattern(), check_conv)]
+    return [('arm_compute_lib.conv2d', conv_pattern(), check_conv)]
 
 
 def _register_external_op_helper(op_name, supported=True):
-    @tvm.ir.register_op_attr(op_name, "target.acl")
+    @tvm.ir.register_op_attr(op_name, "target.arm_compute_lib")
     def _func_wrapper(attrs, args):
         return supported
 
@@ -99,26 +99,20 @@ def _func_wrapper(attrs, args):
 _register_external_op_helper("reshape")
 
 
-@tvm.ir.register_op_attr("nn.conv2d", "target.acl")
+@tvm.ir.register_op_attr("nn.conv2d", "target.arm_compute_lib")
 def conv2d(attrs, args):
     """Check if the external ACL codegen for conv2d should be used."""
-
-    # ACL only supports group size of 1
     if attrs.groups != 1:
         return False
-
-    # ACL only supports NHWC layout
     if attrs.data_layout != "NHWC":
         return False
 
     return True
 
 
-@tvm.ir.register_op_attr("nn.max_pool2d", "target.acl")
+@tvm.ir.register_op_attr("nn.max_pool2d", "target.arm_compute_lib")
 def max_pool2d(attrs, args):
     """Check if the external ACL codegen for maxpool2d should be used."""
-
-    # ACL only supports NHWC layout
     if attrs.layout != "NHWC":
         return False