diff --git a/CMakeLists.txt b/CMakeLists.txt index bdb38e3fb8058..7273238109fd3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,8 +70,8 @@ tvm_option(USE_CPP_RPC "Build CPP RPC" OFF) tvm_option(USE_TFLITE "Build with tflite support" OFF) tvm_option(USE_TENSORFLOW_PATH "TensorFlow root path when use TFLite" none) tvm_option(USE_COREML "Build with coreml support" OFF) -tvm_option(USE_ACL "Build with Arm Compute Library" OFF) -tvm_option(USE_ACL_GRAPH_RUNTIME "Build with Arm Compute Library graph runtime" OFF) +tvm_option(USE_ARM_COMPUTE_LIB "Build with Arm Compute Library" OFF) +tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library graph runtime" OFF) if(USE_CPP_RPC AND UNIX) message(FATAL_ERROR "USE_CPP_RPC is only supported with WIN32. Use the Makefile for non-Windows.") @@ -329,7 +329,7 @@ include(cmake/modules/contrib/HybridDump.cmake) include(cmake/modules/contrib/TFLite.cmake) include(cmake/modules/contrib/TF_TVMDSOOP.cmake) include(cmake/modules/contrib/CoreML.cmake) -include(cmake/modules/contrib/ACL.cmake) +include(cmake/modules/contrib/ArmComputeLib.cmake) include(CheckCXXCompilerFlag) if(NOT MSVC) diff --git a/cmake/config.cmake b/cmake/config.cmake index e59690da2c04e..812e2f714227e 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -184,17 +184,17 @@ set(USE_SORT ON) # Whether use MKL-DNN (DNNL) codegen set(USE_DNNL_CODEGEN OFF) -# Whether to use ACL (Arm Compute Library) codegen +# Whether to use Arm Compute Library (ACL) codegen # We provide 2 separate flags since we cannot build the ACL runtime on x86. # This is useful for cases where you want to cross-compile a relay graph # on x86 then run on AArch. # -# USE_ACL - Support for compiling a relay graph offloading supported -# operators to ACL. OFF/ON -# USE_ACL_GRAPH_RUNTIME - Run ACL annotated functions via the ACL -# runtime. OFF/ON/"path/to/ACL" -set(USE_ACL OFF) -set(USE_ACL_GRAPH_RUNTIME OFF) +# USE_ARM_COMPUTE_LIB - Support for compiling a relay graph offloading supported +# operators to Arm Compute Library. OFF/ON +# USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME - Run Arm Compute Library annotated functions via the ACL +# runtime. OFF/ON/"path/to/ACL" +set(USE_ARM_COMPUTE_LIB OFF) +set(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME OFF) # Build ANTLR parser for Relay text format # Possible values: diff --git a/cmake/modules/contrib/ACL.cmake b/cmake/modules/contrib/ArmComputeLib.cmake similarity index 79% rename from cmake/modules/contrib/ACL.cmake rename to cmake/modules/contrib/ArmComputeLib.cmake index 94db11d1fdf05..ff9c8f7470131 100644 --- a/cmake/modules/contrib/ACL.cmake +++ b/cmake/modules/contrib/ArmComputeLib.cmake @@ -19,23 +19,22 @@ # for AArch. In the world where we take the cross compilation approach, # which is common with arm devices, we need to be able to cross-compile # a relay graph on x86 for AArch and then run the graph on AArch. -if(USE_ACL) - file(GLOB ACL_RELAY_CONTRIB_SRC src/relay/backend/contrib/acl/*.cc) - file(GLOB ACL_RUNTIME_MODULE src/runtime/contrib/acl/acl_runtime.cc) +if(USE_ARM_COMPUTE_LIB) + file(GLOB ACL_RELAY_CONTRIB_SRC src/relay/backend/contrib/arm_compute_lib/*.cc) + file(GLOB ACL_RUNTIME_MODULE src/runtime/contrib/arm_compute_lib/acl_runtime.cc) list(APPEND COMPILER_SRCS ${ACL_RELAY_CONTRIB_SRC}) list(APPEND COMPILER_SRCS ${ACL_RUNTIME_MODULE}) - message(STATUS "Build with ACL support...") + message(STATUS "Build with Arm Compute Library support...") endif() -if(USE_ACL_GRAPH_RUNTIME) +if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME) set(ACL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/acl) # Detect custom ACL path. - if (NOT USE_ACL_GRAPH_RUNTIME STREQUAL "ON") - set(ACL_PATH ${USE_ACL_GRAPH_RUNTIME}) + if (NOT USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME STREQUAL "ON") + set(ACL_PATH ${USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME}) endif() - file(GLOB ACL_CONTRIB_SRC src/runtime/contrib/acl/*) - file(GLOB ACL_API src/relay/backend/contrib/acl/acl_api.cc) + file(GLOB ACL_CONTRIB_SRC src/runtime/contrib/arm_compute_lib/*) set(ACL_INCLUDE_DIRS ${ACL_PATH}/include ${ACL_PATH}) include_directories(${ACL_INCLUDE_DIRS}) @@ -57,12 +56,11 @@ if(USE_ACL_GRAPH_RUNTIME) list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_CORE_LIB}) list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_GRAPH_LIB}) list(APPEND RUNTIME_SRCS ${ACL_CONTRIB_SRC}) - list(APPEND RUNTIME_SRCS ${ACL_API}) - message(STATUS "Build with ACL graph runtime support: " + message(STATUS "Build with Arm Compute Library graph runtime support: " ${EXTERN_ACL_COMPUTE_LIB} ", \n" ${EXTERN_ACL_COMPUTE_CORE_LIB} ", \n" ${EXTERN_ACL_COMPUTE_GRAPH_LIB}) # Set flag to detect ACL graph runtime support. - add_definitions(-DTVM_GRAPH_RUNTIME_ACL) + add_definitions(-DTVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB) endif() diff --git a/src/relay/backend/contrib/acl/README.md b/docs/deploy/arm_compute_lib.rst similarity index 50% rename from src/relay/backend/contrib/acl/README.md rename to docs/deploy/arm_compute_lib.rst index 111f64c2c1f28..76e1c6ad46512 100644 --- a/src/relay/backend/contrib/acl/README.md +++ b/docs/deploy/arm_compute_lib.rst @@ -1,48 +1,58 @@ - - -# Relay Arm® Compute Library Integration +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Relay Arm|reg| Compute Library Integration +========================================== + +Introduction +------------ + Arm Compute Library (ACL) is an open source project that provides accelerated kernels for Arm CPU's and GPU's. Currently the integration offloads operators to ACL to use hand-crafted assembler routines in the library. By offloading select operators from a relay graph to ACL we can achieve a performance boost on such devices. -## Building with ACL support +Building with ACL support +------------------------- The current implementation has two separate build options in cmake. The reason for this split is because ACL cannot be used on an x86 machine. However, we still want to be able compile an ACL runtime module on an x86 machine. -* USE_ACL=ON/OFF - Enabling this flag will add support for compiling an ACL runtime module. -* USE_GRAPH_RUNTIME_ACL=ON/OFF/path-to-acl - Enabling this flag will allow the graph runtime to -compute the ACL offloaded functions. +* USE_ARM_COMPUTE_LIB=ON/OFF - Enabling this flag will add support for compiling an ACL runtime module. +* USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON/OFF/path-to-acl - Enabling this flag will allow the graph runtime to + compute the ACL offloaded functions. These flags can be used in different scenarios depending on your setup. For example, if you want to compile ACL on an x86 machine and then run the module on a remote Arm device via RPC, you will need to use USE_ACL=ON on the x86 machine and USE_GRAPH_RUNTIME_ACL=ON on the remote AArch64 device. -## Usage -_Note:_ this may not stay up-to-date with changes to the API. -1. Create a relay graph. This may be a single operator or a whole graph. The intention is that any + +Usage +----- + +*Note:* this section may not stay up-to-date with changes to the API. + +Create a relay graph. This may be a single operator or a whole graph. The intention is that any relay graph can be input. The ACL integration will only pick supported operators to be offloaded whilst the rest will be computed via TVM. (For this example we will use a single max_pool2d operator). - ``` + +..code:: python + import tvm from tvm import relay @@ -55,57 +65,70 @@ max_pool2d operator). output_shape = (1, 7, 7, 512) data = relay.var('data', shape=data_shape, dtype=data_type) - out = relay.nn.max_pool2d(data, pool_size=pool_size, strides=strides, - layout=layout, padding=padding) + out = relay.nn.max_pool2d(data, pool_size=pool_size, strides=strides, layout=layout, padding=padding) module = tvm.IRModule.from_expr(out) - ``` -2. Annotate and partition the graph for ACL. - ``` - module = relay.transform.AnnotateTarget("acl")(module) - module = relay.transform.PartitionGraph()(module) - ``` -3. Build the Relay graph. - ``` - target = "llvm -target=aarch64-linux-gnu -mattr=+neon" - with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): - json, lib, params = relay.build(module, target=target) - ``` -4. Export the module. - ``` + + +Annotate and partition the graph for ACL. + +..code:: python + + from tvm.relay.op.contrib.arm_compute_lib import partition_for_arm_compute_lib + partition_for_arm_compute_lib(module) + + +Build the Relay graph. + +..code:: python + + target = "llvm -mtriple=aarch64-linux-gnu -mattr=+neon" + with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): + json, lib, params = relay.build(module, target=target) + + +Export the module. + +..code:: python + lib_path = '~/lib_acl.so' cross_compile = 'aarch64-linux-gnu-c++' lib.export_library(lib_path, cc=cross_compile) - ``` - 5. Run Inference. This must be on an Arm device. If compiling on x86 device and running on aarch64 - consider using the RPC mechanism. - ``` + + +Run Inference. This must be on an Arm device. If compiling on x86 device and running on aarch64 +consider using the RPC mechanism. + +..code:: python + tvm.runtime.load_module('lib_acl.so') gen_module = tvm.contrib.graph_runtime.create(json, lib, ctx) - d_data = np.random.uniform(0, 1, data_shape).astype(data_type) map_inputs = {'data': d_data} gen_module.map_inputs(**map_inputs) gen_module.run() - ``` -## More examples + +More examples +------------- The example above only shows a basic example of how ACL can be used for offloading a single Maxpool2D. If you would like to see more examples for each implemented operator and for -networks refer to the tests: `tests/python/contrib/test_acl`. Here you can modify +networks refer to the tests: `tests/python/contrib/test_arm_compute_lib`. Here you can modify `infrastructure.py` to use the remote device you have setup. -## Adding a new operator + +Adding a new operator +--------------------- Adding a new operator requires changes to a series of places. This section will give a hint on what needs to be changed and where, it will not however dive into the complexities for an individual operator. This is left to the developer. There are a series of files we need to make changes to: -* `python/relay/op/contrib/acl.py` In this file we define the operators we wish to offload using the +* `python/relay/op/contrib/arm_compute_lib.py` In this file we define the operators we wish to offload using the `op.register` decorator. This will mean the annotation pass recognizes this operator as ACL offloadable. -* `src/relay/backend/contrib/acl/codegen_acl.h` Implement `Make[OpName]` method. This is where we +* `src/relay/backend/contrib/arm_compute_lib/codegen_acl.h` Implement `Make[OpName]` method. This is where we declare how the operator should be represented by JSON. This will be used to create the ACL module. -* `src/runtime/contrib/acl/acl_kernel.h` Implement `Create[OpName]Layer` method. This is where we +* `src/runtime/contrib/arm_compute_lib/acl_kernel.h` Implement `Create[OpName]Layer` method. This is where we define how the JSON representation can be used to create an ACL function. We simply define how to translate from the JSON representation to ACL API. -* `tests/python/contrib/test_acl` Add unit tests for the given operator. +* `tests/python/contrib/test_arm_compute_lib` Add unit tests for the given operator. diff --git a/python/tvm/relay/op/contrib/__init__.py b/python/tvm/relay/op/contrib/__init__.py index fad7183d92987..26ca78c1190b0 100644 --- a/python/tvm/relay/op/contrib/__init__.py +++ b/python/tvm/relay/op/contrib/__init__.py @@ -18,6 +18,6 @@ """Contrib modules.""" from .register import get_pattern_table, register_pattern_table -from .acl import * +from .arm_compute_lib import * from .dnnl import * from .coreml import * diff --git a/python/tvm/relay/op/contrib/acl.py b/python/tvm/relay/op/contrib/arm_compute_lib.py similarity index 83% rename from python/tvm/relay/op/contrib/acl.py rename to python/tvm/relay/op/contrib/arm_compute_lib.py index 8207575460450..11f22f0bfcde7 100644 --- a/python/tvm/relay/op/contrib/acl.py +++ b/python/tvm/relay/op/contrib/arm_compute_lib.py @@ -24,7 +24,7 @@ from .register import register_pattern_table -def is_acl_runtime_present(): +def is_arm_compute_runtime_present(): """Check if the ACL graph runtime is present. Returns @@ -32,12 +32,12 @@ def is_acl_runtime_present(): ret: bool True if present, False if not. """ - return tvm.get_global_func("relay.op.is_acl_runtime_enabled", True) + return tvm.get_global_func("relay.op.is_arm_compute_runtime_enabled", True) -def partition_for_acl(mod, params=None): +def partition_for_arm_compute_lib(mod, params=None): """Partition the graph greedily offloading supported - operators to ACL. + operators to Arm Compute Library. Parameters ---------- @@ -54,13 +54,13 @@ def partition_for_acl(mod, params=None): mod['main'] = bind_params_by_name(mod['main'], params) seq = tvm.transform.Sequential([transform.MergeComposite(pattern_table()), - transform.AnnotateTarget('acl'), + transform.AnnotateTarget('arm_compute_lib'), transform.PartitionGraph()]) return seq(mod) -@register_pattern_table("acl") +@register_pattern_table("arm_compute_lib") def pattern_table(): """Get the ACL pattern table.""" @@ -85,11 +85,11 @@ def check_conv(extract): call = call.args[0] return conv2d(call.attrs, call.args) - return [('acl.conv2d', conv_pattern(), check_conv)] + return [('arm_compute_lib.conv2d', conv_pattern(), check_conv)] def _register_external_op_helper(op_name, supported=True): - @tvm.ir.register_op_attr(op_name, "target.acl") + @tvm.ir.register_op_attr(op_name, "target.arm_compute_lib") def _func_wrapper(attrs, args): return supported @@ -99,26 +99,20 @@ def _func_wrapper(attrs, args): _register_external_op_helper("reshape") -@tvm.ir.register_op_attr("nn.conv2d", "target.acl") +@tvm.ir.register_op_attr("nn.conv2d", "target.arm_compute_lib") def conv2d(attrs, args): """Check if the external ACL codegen for conv2d should be used.""" - - # ACL only supports group size of 1 if attrs.groups != 1: return False - - # ACL only supports NHWC layout if attrs.data_layout != "NHWC": return False return True -@tvm.ir.register_op_attr("nn.max_pool2d", "target.acl") +@tvm.ir.register_op_attr("nn.max_pool2d", "target.arm_compute_lib") def max_pool2d(attrs, args): """Check if the external ACL codegen for maxpool2d should be used.""" - - # ACL only supports NHWC layout if attrs.layout != "NHWC": return False diff --git a/src/relay/backend/contrib/acl/acl_api.cc b/src/relay/backend/contrib/acl/acl_api.cc deleted file mode 100644 index 5e3aa9c5679ef..0000000000000 --- a/src/relay/backend/contrib/acl/acl_api.cc +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file src/relay/backend/contrib/acl/acl_api.cc - * \brief A common JSON interface between relay and the ACL runtime module. - */ - -#include "acl_api.h" - -namespace tvm { -namespace relay { -namespace contrib { -namespace acl { - -std::pair> DeserializeSubgraph( - std::string* serialized_function) { - dmlc::MemoryStringStream mstrm(serialized_function); - dmlc::Stream* strm = &mstrm; - std::string serialized_json; - strm->Read(&serialized_json); - std::istringstream is(serialized_json); - dmlc::JSONReader reader(&is); - JSONSubGraph function; - function.Load(&reader); - std::vector constants; - size_t const_count; - strm->Read(&const_count); - for (size_t i = 0; i < const_count; i++) { - runtime::NDArray temp; - temp.Load(strm); - constants.push_back(temp); - } - return std::make_pair(function, constants); -} - -std::string SerializeSubgraph(const JSONSubGraph& subgraph, - const std::vector& constants) { - std::ostringstream os; - dmlc::JSONWriter writer(&os); - subgraph.Save(&writer); - std::string serialized_subgraph; - dmlc::MemoryStringStream mstrm(&serialized_subgraph); - dmlc::Stream* strm = &mstrm; - strm->Write(os.str()); - strm->Write(constants.size()); - for (const auto& it : constants) { - it.Save(strm); - } - return serialized_subgraph; -} - -} // namespace acl -} // namespace contrib -} // namespace relay -} // namespace tvm diff --git a/src/relay/backend/contrib/acl/acl_api.h b/src/relay/backend/contrib/acl/acl_api.h deleted file mode 100644 index 60ea03e5b3fe4..0000000000000 --- a/src/relay/backend/contrib/acl/acl_api.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file src/relay/backend/contrib/acl/acl_api.h - * \brief A common JSON interface between relay and the ACL runtime module. - */ - -#ifndef TVM_RELAY_BACKEND_CONTRIB_ACL_ACL_API_H_ -#define TVM_RELAY_BACKEND_CONTRIB_ACL_ACL_API_H_ - -#include -#include -#include - -#include -#include -#include -#include - -namespace tvm { -namespace relay { -namespace contrib { -namespace acl { - -DMLC_JSON_ENABLE_ANY(std::vector, IntVector); -DMLC_JSON_ENABLE_ANY(int, Int); -DMLC_JSON_ENABLE_ANY(size_t, Size_t); -DMLC_JSON_ENABLE_ANY(std::string, String); - -/*! - * JSON interface for ACL tensor. - */ -class JSONTensor { - public: - JSONTensor() = default; - explicit JSONTensor(std::vector shape) : type("var"), shape(std::move(shape)) {} - - JSONTensor(std::string type, std::vector shape) - : type(std::move(type)), shape(std::move(shape)) {} - - void Save(dmlc::JSONWriter* writer) const { - writer->BeginObject(); - writer->WriteObjectKeyValue("type", type); - writer->WriteObjectKeyValue("shape", shape); - writer->EndObject(); - } - - void Load(dmlc::JSONReader* reader) { - dmlc::JSONObjectReadHelper helper; - helper.DeclareField("type", &type); - helper.DeclareField("shape", &shape); - helper.ReadAllFields(reader); - } - - /*! \brief The type of the tensor var/const. */ - std::string type; - /*! \brief The shape of the tensor. */ - std::vector shape; -}; - -/*! - * JSON interface for an ACL operator. - */ -class JSONOp { - public: - JSONOp() = default; - explicit JSONOp(std::string name) : name(std::move(name)) {} - - void Save(dmlc::JSONWriter* writer) const { - auto op_attrs = attrs; - op_attrs["num_inputs"] = dmlc::any(inputs.size()); - op_attrs["num_outputs"] = dmlc::any(outputs.size()); - writer->BeginObject(); - writer->WriteObjectKeyValue("name", name); - writer->WriteObjectKeyValue("inputs", inputs); - writer->WriteObjectKeyValue("outputs", outputs); - writer->WriteObjectKeyValue("attrs", op_attrs); - writer->EndObject(); - } - - void Load(dmlc::JSONReader* reader) { - dmlc::JSONObjectReadHelper helper; - helper.DeclareField("name", &name); - helper.DeclareField("inputs", &inputs); - helper.DeclareField("outputs", &outputs); - helper.DeclareField("attrs", &attrs); - helper.ReadAllFields(reader); - } - - /*! The name of the operator. */ - std::string name; - /*! The required variable inputs to the operator. */ - std::vector inputs; - /*! The required outputs to the operator. */ - std::vector outputs; - /*! The attributes of the operator e.g. padding, strides, etc. */ - std::unordered_map attrs; -}; - -/*! - * JSON interface for a series of ACL ops. - */ -class JSONSubGraph { - public: - JSONSubGraph() = default; - explicit JSONSubGraph(JSONOp op) : op(std::move(op)) {} - - void Save(dmlc::JSONWriter* writer) const { - writer->BeginObject(); - writer->WriteObjectKeyValue("node", op); - writer->EndObject(); - } - - void Load(dmlc::JSONReader* reader) { - dmlc::JSONObjectReadHelper helper; - helper.DeclareField("node", &op); - helper.ReadAllFields(reader); - } - - /*! \brief JSON op to be serialized. */ - JSONOp op; -}; - -/*! - * \brief Deserialize a function (or subgraph). The function is serialized in the - * format: Serialized JSON (using dmlc::JSONWriter), number of constants, serialized - * NDArray constants. - * - * \param serialized_function Pointer to a serialized function (or subgraph). - * \return A pair consisting of deserialized json subgraph object and deserialized - * NDArray. - */ -std::pair> DeserializeSubgraph( - std::string* serialized_function); - -/*! - * \brief Serialize a single subgraph which can be saved to disk. - * - * A subgraph is serialized so that the output is as follows: - * - Serialized JSON. - * - Number of constant tensors. - * - Serialized constant tensors. - * - * \param subgraph JSON subgraph representation. - * \constants Serialized JSON constants. - */ -std::string SerializeSubgraph(const JSONSubGraph& subgraph, - const std::vector& constants); - -} // namespace acl -} // namespace contrib -} // namespace relay -} // namespace tvm - -#endif // TVM_RELAY_BACKEND_CONTRIB_ACL_ACL_API_H_ diff --git a/src/relay/backend/contrib/acl/codegen.cc b/src/relay/backend/contrib/acl/codegen.cc deleted file mode 100644 index 1c61a6b09fce4..0000000000000 --- a/src/relay/backend/contrib/acl/codegen.cc +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file src/relay/backend/contrib/acl/codegen_acl.cc - * \brief Implementation of the Relay -> ACL JSON schema compiler. - */ -#include -#include - -#include "../../utils.h" -#include "codegen_acl.h" - -namespace tvm { -namespace relay { -namespace contrib { -namespace acl { - -void CodegenACL::VisitLeaf(const Expr& expr) { - if (expr->IsInstance()) { - const auto* constant_node = expr.as(); - this->constants_.push_back(constant_node->data); - } else if (!expr->IsInstance()) { - // Don't enter functions - MixedModeVisitor::VisitLeaf(expr); - } -} - -void CodegenACL::VisitExpr_(const CallNode* node) { - Call call = GetRef(node); - if (this->layer_table_.find(call) == this->layer_table_.end()) { - for (const auto& arg : call->args) { - this->VisitExpr(arg); - } - // Determine call -> ACL mapping - JSONOp layer; - if (IsAclFunc(node, "acl.conv2d") || backend::IsOp(node, "nn.conv2d")) { - layer = MakeConvolutionOp(call); - } else if (backend::IsOp(node, "nn.max_pool2d")) { - layer = MakeMaxPool2DOp(call); - } else if (backend::IsOp(node, "reshape")) { - layer = MakeReshapeOp(call); - } else { - LOG(FATAL) << "Unsupported op: " << AsText(node->op, false); - } - this->layer_table_[call] = layer; - } -} - -runtime::Module CodegenACL::CreateRuntimeModule(const ObjectRef& ref) { - std::vector> serialized_functions; - if (ref->IsInstance()) { - IRModule mod; - Function func = Downcast(ref); - auto name_node = func->GetAttr(tvm::attr::kGlobalSymbol); - CHECK(name_node.defined()) << "Failed to retrieve external symbol"; - mod->Add(GlobalVar(name_node.value()), func); - mod = this->PreProcessModule(mod); - for (const auto& it : mod->functions) { - this->SerializeFunction(it.second, &serialized_functions); - } - } else { - LOG(FATAL) << "The input ref is expected to be a Relay function."; - } - std::string data; - dmlc::MemoryStringStream fs(&data); - dmlc::SeekStream* strm = &fs; - strm->Write(serialized_functions.size()); - for (const auto& it : serialized_functions) { - strm->Write(it.first); - strm->Write(it.second); - } - strm->Seek(0); - std::string make_acl_module = "runtime.module.loadbinary_acl"; - auto pf = tvm::runtime::Registry::Get(make_acl_module); - if (pf) { - return (*pf)(strm); - } else { - return runtime::Module(); - } -} - -JSONSubGraph CodegenACL::CreateJSONSubgraph(const Function& func) { - Expr body = func->body; - this->layer_table_.clear(); - this->constants_.clear(); - this->VisitExpr(body); - std::vector ops; - for (const auto& it : this->layer_table_) { - ops.push_back(it.second); - } - CHECK_EQ(layer_table_.size(), 1) << "ACL codegen expects only a single op per function."; - return JSONSubGraph(ops[0]); -} - -void CodegenACL::SerializeFunction( - const ObjectRef& ref, std::vector>* serialized_functions) { - Function func = Downcast(ref); - JSONSubGraph subgraph = this->CreateJSONSubgraph(func); - const auto name_node = func->GetAttr(tvm::attr::kGlobalSymbol); - CHECK(name_node != "") << "Fail to retrieve external symbol"; - std::string serialized_pair = SerializeSubgraph(subgraph, this->constants_); - serialized_functions->emplace_back(name_node.value(), serialized_pair); -} - -IRModule CodegenACL::PreProcessModule(const IRModule& mod) { - IRModule preprocessed_module; - tvm::Map> desired_layouts = { - {"nn.conv2d", {String("NHWC"), String("OHWI")}}}; - preprocessed_module = transform::ConvertLayout(desired_layouts)(mod); - preprocessed_module = transform::FoldConstant()(preprocessed_module); - return preprocessed_module; -} - -JSONOp CodegenACL::MakeConvolutionOp(const Call& call) { - JSONOp op("conv2d"); - const CallNode* pad = nullptr; - const CallNode* conv; - const CallNode* bias = nullptr; - bool has_activation = false; - if (call->op->IsInstance()) { - Expr composite_conv = GetCompositeExpr(call); - // Unpack composite function - const auto* current_call = composite_conv.as(); - if (backend::IsOp(current_call, "nn.relu")) { - has_activation = true; - current_call = current_call->args[0].as(); - } - if (backend::IsOp(current_call, "nn.bias_add")) { - bias = current_call; - current_call = current_call->args[0].as(); - } - CHECK(backend::IsOp(current_call, "nn.conv2d")); - conv = current_call; - if (!current_call->args.empty() && current_call->args[0]->IsInstance()) { - current_call = current_call->args[0].as(); - if (backend::IsOp(current_call, "nn.pad")) { - pad = current_call; - } - } - } else { - conv = call.as(); - } - const auto* conv_attr = conv->attrs.as(); - CHECK(conv_attr); - CHECK(conv_attr->kernel_layout == "OHWI") - << "Kernel layout must be OHWI, has the module been pre-processed correctly?"; - if (pad) { - op.inputs.push_back(MakeJSONTensor(pad->args[0])); - } else { - op.inputs.push_back(MakeJSONTensor(conv->args[0])); - } - op.inputs.push_back(MakeJSONConstTensor(conv->args[1])); - op.outputs.push_back(MakeJSONTensor(GetRef(conv))); - if (bias) { - op.inputs.push_back(MakeJSONConstTensor(bias->args[1])); - } - // It seems there are two different methods for padding a convolution: - // - using nn.pad operator before convolution - // - using conv2d_attrs to add padding - // - // Cover both cases here. - std::vector padding; - if (pad) { - const auto* pad_attr = pad->attrs.as(); - CHECK(pad_attr); - padding = GetPadVector(pad_attr->pad_width); - } else { - padding = GetPadVector(conv_attr->padding); - } - op.attrs["padding"] = padding; - op.attrs["groups"] = conv_attr->groups; - op.attrs["strides"] = ToVector(conv_attr->strides); - if (has_activation) op.attrs["activation_type"] = std::string("relu"); - return op; -} - -JSONOp CodegenACL::MakeMaxPool2DOp(const Call& call) { - JSONOp op("max_pool"); - const auto* attr = call->attrs.as(); - CHECK(attr); - op.inputs.push_back(MakeJSONTensor(call->args[0])); - op.outputs.push_back(MakeJSONTensor(call)); - op.attrs["padding"] = GetPadVector(attr->padding); - op.attrs["strides"] = ToVector(attr->strides); - op.attrs["pooling_type"] = std::string("max"); - op.attrs["pool_size"] = ToVector(attr->pool_size); - return op; -} - -JSONOp CodegenACL::MakeReshapeOp(const Call& call) { - JSONOp op("reshape"); - const auto* attr = call->attrs.as(); - CHECK(attr); - op.inputs.push_back(MakeJSONTensor(call->args[0])); - op.outputs.push_back(MakeJSONTensor(call)); - return op; -} - -JSONTensor CodegenACL::MakeJSONTensor(const Expr& expr) { - const auto* ttnode = expr->checked_type().as(); - CHECK(ttnode); - std::vector shape = ToVector(ttnode->shape); - return JSONTensor("var", shape); -} - -JSONTensor CodegenACL::MakeJSONConstTensor(const Expr& expr) { - const auto* ttnode = expr->checked_type().as(); - CHECK(ttnode); - std::vector shape = ToVector(ttnode->shape); - VisitExpr(expr); - return JSONTensor("const", shape); -} - -bool CodegenACL::IsAclFunc(const CallNode* call, const std::string& op_name) const { - if (call->op->IsInstance()) { - Function func = Downcast(call->op); - CHECK(func.defined()); - auto name_node = func->GetAttr(attr::kComposite); - return name_node.value() == op_name; - } - return false; -} - -Expr CodegenACL::GetCompositeExpr(const Call& call) { - Function composite_function = Downcast(call->op); - Expr composite_expr = composite_function->body; - CHECK(composite_expr->IsInstance()); - return composite_expr; -} - -std::vector CodegenACL::ToVector(const Array& array) { - std::vector stl_vector; - for (auto it : array) { - const auto* val = it.as(); - CHECK(val); - stl_vector.push_back(val->value); - } - return stl_vector; -} - -std::vector CodegenACL::GetPadVector(const Array>& pad) { - // TVM nn.pad: top, bottom, left, right -> ACL Pad: left, right, top, bottom - auto acl_pad = {pad[2][0], pad[2][1], pad[1][0], pad[1][1]}; - return ToVector(acl_pad); -} - -std::vector CodegenACL::GetPadVector(const Array& pad) { - Array acl_pad; - switch (pad.size()) { - case 1: - acl_pad = {pad[0], pad[0], pad[0], pad[0]}; - break; - case 2: - // TVM Pad: height, width -> ACL Pad: left, right, top, bottom - acl_pad = {pad[1], pad[1], pad[0], pad[0]}; - break; - case 4: - // TVM Pad: top, left, bottom, right -> ACL Pad: left, right, top, bottom - acl_pad = {pad[1], pad[3], pad[0], pad[2]}; - break; - default: - LOG(FATAL) << "Unsupported padding dimensions"; - } - return ToVector(acl_pad); -} - -} // namespace acl -} // namespace contrib -} // namespace relay -} // namespace tvm diff --git a/src/relay/backend/contrib/acl/codegen_acl.h b/src/relay/backend/contrib/acl/codegen_acl.h deleted file mode 100644 index 23efb09521b2a..0000000000000 --- a/src/relay/backend/contrib/acl/codegen_acl.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file src/relay/backend/contrib/acl/codegen_acl.h - * \brief The Relay -> ACL JSON schema compiler. - */ - -#ifndef TVM_RELAY_BACKEND_CONTRIB_ACL_CODEGEN_ACL_H_ -#define TVM_RELAY_BACKEND_CONTRIB_ACL_CODEGEN_ACL_H_ - -#include -#include -#include - -#include -#include -#include -#include - -#include "acl_api.h" - -namespace tvm { -namespace relay { -namespace contrib { -namespace acl { - -/*! - * \brief Generates an ACLModule from a relay expression. This "compilation" - * does not require ACL since the actual conversion using ACL APIs is - * deferred until creation of the runtime. This step simply serializes the - * relay program into a JSON string. - */ -class CodegenACL : public MixedModeVisitor { - public: - CodegenACL() = default; - void VisitExpr_(const CallNode* node) final; - void VisitLeaf(const Expr& expr) final; - - /*! - * \brief Create a runtime module for ACL. - * - * This consists of a series of "serialized functions" which each represent a - * subgraph to be computed by ACL and will each be executed independently from - * one another. Each function consists of serialized JSON describing the subgraph - * and serialized constant tensors. - * - * \note The ACL runtime module only currently supports a single operator per - * subgraph currently. - * - * \param ref The ext_func Relay expression/module to be executed using extern ops. - * \return A runtime module. - */ - runtime::Module CreateRuntimeModule(const ObjectRef& ref); - - /*! - * \brief Create a JSON representation of a subgraph. - * - * \param func The function to be represented. - * \return A JSON representation of the function. - */ - JSONSubGraph CreateJSONSubgraph(const Function& func); - - private: - /*! - * \brief Serialize a single subgraph which can be saved to disk. - * - * A subgraph is serialized so that the output is as follows. - * - Serialized JSON. - * - Number of constant tensors. - * - Serialized constant tensors. - * - * \param ref Reference to the function to be serialized. - * \param serialized_functions A vector of serialized functions to add to. - */ - void SerializeFunction(const ObjectRef& ref, - std::vector>* serialized_functions); - - /*! - * \brief Pre-process a module containing functions ready for ACL codegen. - * - * For now we enforce OHWI kernel layout and fold the transforms away. - * - * \param mod The module to be pre-processed. - * \return The processed module. - */ - IRModule PreProcessModule(const IRModule& mod); - - /*! - * \brief Create a JSON representation of an operator. - * - * \param call The call to be represented. - * \return A JSON representation of a specific operator. - */ - JSONOp MakeConvolutionOp(const Call& call); - static JSONOp MakeMaxPool2DOp(const Call& call); - static JSONOp MakeReshapeOp(const Call& call); - - /*! - * \brief Make a JSON representation of a (constant)tensor. - * - * \param expr Expression of a tensor to be represented. - * \return A JSON representation of a tensor. - */ - static JSONTensor MakeJSONTensor(const Expr& expr); - JSONTensor MakeJSONConstTensor(const Expr& expr); - - /*! - * \brief Check whether CallNode is a composite function and has the same - * op_name. - * - * \param call The current call node. - * \param op_name The expected name of the call node to check. - * \return True if the call node is composite and has the same name as - * op_name, False otherwise. - */ - bool IsAclFunc(const CallNode* call, const std::string& op_name) const; - - /*! - * \brief Get composite expression from call node. - * - * \param call The call node to get expression from. - * \return Expression for composite function. - */ - static Expr GetCompositeExpr(const Call& call); - - /*! - * \brief Convert a relay array to std::vector. - * - * \param array A relay array to be converted. - * \return std::vector. - */ - static std::vector ToVector(const Array& array); - - /*! - * \brief Create a padding vector compatible with ACL. - * - * Currently TVM has many ways to pad a an operator, so each method is taken care of here. - * - * \param pad Padding array. - * \return ACL compatible padding vector. - */ - static std::vector GetPadVector(const Array>& pad); - static std::vector GetPadVector(const Array& pad); - - /*! \brief A vector of constants to be serialized after the JSON representation is constructed. */ - std::vector constants_; - /*! \brief A look-up table from Expr to JSONOp. */ - std::map layer_table_; -}; - -/*! - * \brief The external ACL compiler/codegen tool. It takes a Relay - * expression/module and compiles it into a runtime module. - */ -runtime::Module ACLCompiler(const ObjectRef& ref) { - CodegenACL acl_codegen; - return acl_codegen.CreateRuntimeModule(ref); -} - -TVM_REGISTER_GLOBAL("relay.ext.acl").set_body_typed(ACLCompiler); - -/*! - * \brief Check whether ACL graph runtime is used. - * \return True if ACL graph runtime is enabled, False if not. - */ -inline constexpr bool IsACLRuntimeEnabled() { -#if TVM_GRAPH_RUNTIME_ACL - return true; -#else - return false; -#endif -} - -TVM_REGISTER_GLOBAL("relay.op.is_acl_runtime_enabled").set_body_typed(IsACLRuntimeEnabled); - -} // namespace acl -} // namespace contrib -} // namespace relay -} // namespace tvm - -#endif // TVM_RELAY_BACKEND_CONTRIB_ACL_CODEGEN_ACL_H_ diff --git a/src/relay/backend/contrib/arm_compute_lib/codegen.cc b/src/relay/backend/contrib/arm_compute_lib/codegen.cc new file mode 100644 index 0000000000000..8017906ea25a8 --- /dev/null +++ b/src/relay/backend/contrib/arm_compute_lib/codegen.cc @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/arm_compute_lib/codegen_acl.cc + * \brief Implementation of the Relay -> ACL JSON serializer. + */ +#include +#include +#include + +#include "../../utils.h" +#include "codegen_acl.h" + +namespace tvm { +namespace relay { +namespace contrib { +namespace arm_compute_lib { + +using JSONGraphNode = tvm::runtime::json::JSONGraphNode; +using JSONGraphNodeEntry = tvm::runtime::json::JSONGraphNodeEntry; + +std::vector ACLJSONSerializer::VisitExpr_(const CallNode* cn) { + Expr expr = GetRef(cn); + std::string name; + std::shared_ptr json_node; + + if (cn->op.as()) { + json_node = CreateOp(cn); + } else if (const auto* fn = cn->op.as()) { + auto comp = fn->GetAttr(attr::kComposite); + CHECK(comp.defined()) << "Arm Compute Library JSON runtime only supports composite functions."; + name = comp.value(); + if (name == "arm_compute_lib.conv2d") { + json_node = CreateCompositeConvolution(cn); + } else { + LOG(FATAL) << "Unrecognized Arm Compute Library pattern: " << name; + } + } else { + LOG(FATAL) << "Arm Compute Library JSON runtime does not support calls to " + << cn->op->GetTypeKey(); + } + + return AddNode(json_node, GetRef(cn)); +} + +std::vector ACLJSONSerializer::VisitExpr_(const ConstantNode* cn) { + this->constants_.push_back(cn->data); + return JSONSerializer::VisitExpr_(cn); +} + +std::shared_ptr ACLJSONSerializer::CreateOp(const CallNode* cn) { + const auto* op = cn->op.as(); + CHECK(op); + const std::string name = op->name; + // Collect inputs + std::vector inputs; + for (const auto& arg : cn->args) { + auto res = VisitExpr(arg); + inputs.insert(inputs.end(), res.begin(), res.end()); + } + // Create JSON op + auto json_node = std::make_shared(name, "kernel", inputs, 1); + SetCallNodeAttribute(json_node, cn); + return json_node; +} + +std::shared_ptr ACLJSONSerializer::CreateCompositeConvolution(const CallNode* cn) { + const std::string name = "arm_compute_lib.conv2d"; + const CallNode* pad = nullptr; + const CallNode* conv; + const CallNode* bias = nullptr; + bool has_activation = false; + + // Unpack composite function + const auto* fn = cn->op.as(); + CHECK(fn); + const auto* current_call = fn->body.as(); + if (backend::IsOp(current_call, "nn.relu")) { + has_activation = true; + current_call = current_call->args[0].as(); + } + if (backend::IsOp(current_call, "nn.bias_add")) { + bias = current_call; + current_call = current_call->args[0].as(); + } + CHECK(backend::IsOp(current_call, "nn.conv2d")); + conv = current_call; + if (!current_call->args.empty() && current_call->args[0]->IsInstance()) { + current_call = current_call->args[0].as(); + if (backend::IsOp(current_call, "nn.pad")) { + pad = current_call; + } + } + + const auto* conv_attr = conv->attrs.as(); + CHECK(conv_attr); + CHECK(conv_attr->kernel_layout == "OHWI") + << "Kernel layout must be OHWI, has the module been pre-processed correctly?"; + + std::vector inputs; + inputs.push_back(VisitExpr(cn->args[0])[0]); + inputs.push_back(VisitExpr(conv->args[1])[0]); + if (bias) { + inputs.push_back(VisitExpr(bias->args[1])[0]); + } + + auto json_node = std::make_shared(name, "kernel", inputs, 1); + SetCallNodeAttribute(json_node, conv); + + // Override attributes + if (pad) { + const auto* pad_attr = pad->attrs.as(); + CHECK(pad_attr); + auto p = pad_attr->pad_width; + // Convert to TVM layout for now, conversion to ACL layout takes place in runtime. + // Standard convolution pad layout for TVM: top, left, bottom, right. + std::vector padding = {std::to_string(p[1][0].as()->value), + std::to_string(p[2][0].as()->value), + std::to_string(p[1][1].as()->value), + std::to_string(p[2][1].as()->value)}; + std::vector padding_attr; + padding_attr.emplace_back(padding); + json_node->SetAttr("padding", padding_attr); + } + if (has_activation) { + std::vector activation_type = {"relu"}; + std::vector act_attr; + act_attr.emplace_back(activation_type); + json_node->SetAttr("activation_type", act_attr); + } + return json_node; +} + +Array ACLJSONSerializer::GetParamsData() { return constants_; } + +IRModule PreProcessModule(const IRModule& mod) { + IRModule preprocessed_module; + tvm::Map> desired_layouts = { + {"nn.conv2d", {String("NHWC"), String("OHWI")}}}; + preprocessed_module = transform::ConvertLayout(desired_layouts)(mod); + preprocessed_module = transform::FoldConstant()(preprocessed_module); + return preprocessed_module; +} + +runtime::Module ACLCompiler(const ObjectRef& ref) { + CHECK(ref->IsInstance()) << "The input ref is expected to be a Relay function."; + Function func = Downcast(ref); + std::string func_name = GetExtSymbol(func); + + IRModule mod; + mod->Add(GlobalVar(func_name), func); + mod = PreProcessModule(mod); + + CHECK(mod->functions.size() == 1) << "Module should only contain single function"; + Function processed_func = Downcast(mod->functions.begin().operator*().second); + + ACLJSONSerializer serializer(func_name, processed_func); + serializer.serialize(); + std::string graph_json = serializer.GetJSON(); + auto param_names = serializer.GetParams(); + auto param_data = serializer.GetParamsData(); + const auto* pf = runtime::Registry::Get("runtime.arm_compute_lib_runtime_create"); + CHECK(pf != nullptr) << "Cannot find JSON runtime module to create"; + runtime::Module lib = (*pf)(func_name, graph_json, param_names, param_data); + return lib; +} + +} // namespace arm_compute_lib +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/src/relay/backend/contrib/arm_compute_lib/codegen_acl.h b/src/relay/backend/contrib/arm_compute_lib/codegen_acl.h new file mode 100644 index 0000000000000..277d807a3f2f7 --- /dev/null +++ b/src/relay/backend/contrib/arm_compute_lib/codegen_acl.h @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/arm_compute_lib/codegen_acl.h + * \brief The Relay -> ACL JSON schema compiler. + */ + +#ifndef TVM_RELAY_BACKEND_CONTRIB_ARM_COMPUTE_LIB_CODEGEN_ACL_H_ +#define TVM_RELAY_BACKEND_CONTRIB_ARM_COMPUTE_LIB_CODEGEN_ACL_H_ + +#include + +#include +#include +#include +#include +#include + +#include "../../../../runtime/contrib/json/json_node.h" +#include "../codegen_json/codegen_json.h" + +namespace tvm { +namespace relay { +namespace contrib { +namespace arm_compute_lib { + +/*! + * \brief Generates an ACLModule from a relay expression. This "compilation" + * does not require ACL since the actual conversion using ACL APIs is + * deferred until creation of the runtime. This step simply serializes the + * relay program into a JSON string. + */ +class ACLJSONSerializer : public backend::contrib::JSONSerializer { + using JSONGraphNode = tvm::runtime::json::JSONGraphNode; + using JSONGraphNodeEntry = tvm::runtime::json::JSONGraphNodeEntry; + + public: + ACLJSONSerializer(const std::string& symbol, const Expr& expr) : JSONSerializer(symbol, expr) {} + + std::vector VisitExpr_(const CallNode* cn) override; + std::vector VisitExpr_(const ConstantNode* cn) override; + + /*! + * \brief Get the constant data transposed when pre-processing the + * input function. + * + * \return An array of constants + */ + Array GetParamsData(); + + private: + /*! + * \brief Create a JSON representation of an operator. + * + * \param call The call to be represented. + * \return A JSON representation of a specific operator. + */ + std::shared_ptr CreateOp(const CallNode* cn); + std::shared_ptr CreateCompositeConvolution(const CallNode* cn); + + /* \brief Transposed constant tensors to serialize. Arm Compute Library expects constant tensors + * in OHWI format. */ + Array constants_; +}; + +/*! + * \brief Pre-process a module containing functions ready for ACL codegen. + * + * For now we enforce OHWI kernel layout and fold the transforms away. + * + * \param mod The module to be pre-processed. + * \return The processed module. + */ +IRModule PreProcessModule(const IRModule& mod); + +/*! + * \brief Create a runtime module for ACL. + * + * This consists of a series of "serialized functions" which each represent a + * sub-graph to be computed by ACL and will each be executed independently from + * one another. Each function consists of serialized JSON describing the sub-graph + * and serialized constant tensors. + * + * \note The ACL runtime module only currently supports a single operator per + * sub-graph currently. + * + * \param ref The ext_func Relay expression/module to be executed using extern ops. + * \return A runtime module. + */ +runtime::Module ACLCompiler(const ObjectRef& ref); + +/*! + * \brief Get the external symbol of the Relay function name. + * + * \param func The provided function. + * + * \return An external symbol. + */ +std::string GetExtSymbol(const Function& func) { + const auto name_node = func->GetAttr(tvm::attr::kGlobalSymbol); + CHECK(name_node.defined()) << "Fail to retrieve external symbol."; + return std::string(name_node.value()); +} + +TVM_REGISTER_GLOBAL("relay.ext.arm_compute_lib").set_body_typed(ACLCompiler); + +/*! + * \brief Check whether ACL graph runtime is used. + * \return True if ACL graph runtime is enabled, False if not. + */ +inline constexpr bool IsACLRuntimeEnabled() { +#if TVM_GRAPH_RUNTIME_ACL + return true; +#else + return false; +#endif +} + +TVM_REGISTER_GLOBAL("relay.op.is_arm_compute_runtime_enabled").set_body_typed(IsACLRuntimeEnabled); + +} // namespace arm_compute_lib +} // namespace contrib +} // namespace relay +} // namespace tvm + +#endif // TVM_RELAY_BACKEND_CONTRIB_ARM_COMPUTE_LIB_CODEGEN_ACL_H_ diff --git a/src/runtime/contrib/acl/acl_kernel.cc b/src/runtime/contrib/acl/acl_kernel.cc deleted file mode 100644 index a87b1b525e2e5..0000000000000 --- a/src/runtime/contrib/acl/acl_kernel.cc +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file src/runtime/contrib/acl/acl_kernel.cc - * \brief TVM compatible wrappers for ACL kernels. - */ - -#include "acl_kernel.h" - -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace tvm { -namespace runtime { -namespace contrib { -namespace acl { - -CachedLayer::CachedLayer(const api::JSONSubGraph& function, const std::vector& constants, - ACLAllocator* allocator, - const std::shared_ptr& mm) - : constants_(constants), allocator_(allocator) { - api::JSONOp op = function.op; - // Make tensors - int const_tensor_idx = 0; - for (const auto& it : op.inputs) { - if (it.type == "const") { - this->function_.const_inputs.push_back(MakeTensor(it, constants[const_tensor_idx++]->data)); - } else if (it.type == "var") { - this->function_.inputs.push_back(MakeTensor(it)); - } else { - LOG(FATAL) << "Unsupported tensor type"; - } - } - for (const auto& it : op.outputs) { - this->function_.outputs.push_back(MakeTensor(it)); - } - // Create layer - if (op.name == "conv2d") { - CreateConvolution2DLayer(&this->function_, function.op, mm); - this->is_mm_ = true; - } else if (op.name == "max_pool") { - CreateMaxPoolLayer(&this->function_, function.op); - } else if (op.name == "reshape") { - CreateReshapeLayer(&this->function_, function.op); - } else { - LOG(FATAL) << "Operator not yet supported"; - } - // Prepare function - this->function_.function->prepare(); -} - -bool CachedLayer::Inference(const std::vector& inputs, - const std::vector& outputs) { - for (size_t i = 0; i < inputs.size(); i++) { - CheckACLError(function_.inputs[i].allocator()->import_memory(inputs[i]->data)); - } - for (size_t i = 0; i < outputs.size(); i++) { - CheckACLError(function_.outputs[i].allocator()->import_memory(outputs[i]->data)); - } - - this->function_.function->run(); - return true; -} - -size_t CachedLayer::GetNumInputs() const { return this->function_.inputs.size(); } - -void CachedLayer::CreateConvolution2DLayer(CacheItems* cache, const api::JSONOp& params, - const std::shared_ptr& mm) { - auto padding = dmlc::get>(params.attrs.at("padding")); - auto strides = dmlc::get>(params.attrs.at("strides")); - auto groups = dmlc::get(params.attrs.at("groups")); - - CHECK(groups == 1) << "ACL NEON Convolution only supports group size of 1"; - - acl::PadStrideInfo pad_stride_info = - acl::PadStrideInfo(strides[0], strides[1], padding[0], padding[1], padding[2], padding[3], - acl::DimensionRoundingType::FLOOR); - acl::ActivationLayerInfo act_info = acl::ActivationLayerInfo(); - if (params.attrs.find("activation_type") != params.attrs.end()) { - auto activation_function = dmlc::get(params.attrs.at("activation_type")); - - if (activation_function == "relu") { - act_info = acl::ActivationLayerInfo(acl::ActivationLayerInfo::ActivationFunction::RELU); - } else { - LOG(FATAL) << "Unsupported activation function"; - } - } - - auto function = std::make_shared(mm); - function->configure(&cache->inputs[0], &cache->const_inputs[0], - cache->const_inputs.size() > 1 ? &cache->const_inputs[1] : nullptr, - &cache->outputs[0], pad_stride_info, acl::WeightsInfo(), acl::Size2D(1U, 1U), - act_info); - - cache->function = function; -} - -void CachedLayer::CreateMaxPoolLayer(CacheItems* cache, const api::JSONOp& params) { - auto padding = dmlc::get>(params.attrs.at("padding")); - auto strides = dmlc::get>(params.attrs.at("strides")); - auto pool_size = dmlc::get>(params.attrs.at("pool_size")); - auto pooling_type = dmlc::get(params.attrs.at("pooling_type")); - - acl::PoolingType pool_type; - if (pooling_type == "max") { - pool_type = acl::PoolingType::MAX; - } else { - LOG(FATAL) << "Pooling type not supported"; - } - - acl::PadStrideInfo pad_stride_info = - acl::PadStrideInfo(strides[0], strides[1], padding[0], padding[1], padding[2], padding[3], - acl::DimensionRoundingType::FLOOR); - acl::PoolingLayerInfo pool_info = acl::PoolingLayerInfo( - pool_type, acl::Size2D(pool_size[0], pool_size[1]), acl::DataLayout::NHWC, pad_stride_info); - - auto function = std::make_shared(); - function->configure(&cache->inputs[0], &cache->outputs[0], pool_info); - - cache->function = function; -} - -void CachedLayer::CreateReshapeLayer(CacheItems* cache, const api::JSONOp& params) { - auto function = std::make_shared(); - function->configure(&cache->inputs[0], &cache->outputs[0]); - - cache->function = function; -} - -} // namespace acl -} // namespace contrib -} // namespace runtime -} // namespace tvm diff --git a/src/runtime/contrib/acl/acl_kernel.h b/src/runtime/contrib/acl/acl_kernel.h deleted file mode 100644 index 8ab8eaf229109..0000000000000 --- a/src/runtime/contrib/acl/acl_kernel.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file src/runtime/contrib/acl/acl_kernel.h - * \brief Use ACL library kernels, we create an interface to these. - */ - -#ifndef TVM_RUNTIME_CONTRIB_ACL_ACL_KERNEL_H_ -#define TVM_RUNTIME_CONTRIB_ACL_ACL_KERNEL_H_ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "../../../relay/backend/contrib/acl/acl_api.h" -#include "acl_allocator.h" -#include "acl_utils.h" - -namespace tvm { -namespace runtime { -namespace contrib { -namespace acl { - -namespace api = relay::contrib::acl; -namespace acl = arm_compute; - -/*! - * \brief ACL objects we cache in order to avoid needing to construct - * a new layer each time. - */ -struct CacheItems { - std::shared_ptr function; - std::vector inputs; - std::vector const_inputs; - std::vector outputs; -}; - -/*! - * \brief A cached ACL layer containing a single ACL function. - */ -class CachedLayer { - public: - /*! - * \brief Create an ACL layer from a JSON representation. Also prepare - * the layer for execution - this will perform actions such as pre- - * transposing of weights. - * - * \note The naming suggests a subgraph directly maps to a layer. - * In general this is not true, but since ACL only expects subgraphs - * consisting of a single op it is. - * - * \param function A JSON representation of a subgraph. - * \param constants The constants used in the subgraph. - * \param allocator ACL can request memory from TVM. - */ - CachedLayer(const api::JSONSubGraph& function, const std::vector& constants, - ACLAllocator* allocator, const std::shared_ptr& mm); - - /*! - * \brief Run inference on the ACL layer. - * - * \param inputs The inputs for the layer. - * \param outputs The outputs for the layer. - * \return True if success, False if not successful. - */ - bool Inference(const std::vector& inputs, const std::vector& outputs); - - /*! - * \brief Get the number of inputs the layer takes. - * - * \return Number of inputs. - */ - size_t GetNumInputs() const; - - /*! - * \brief Check if the layer requires working memory to be allocated. - * - * \return True if it does, False if not. - */ - bool IsMemoryManaged() const { return this->is_mm_; } - - private: - /*! \brief Constant tensors used in the layer. */ - std::vector constants_; - /*! \brief Cache ACL function and tensors for execution. */ - CacheItems function_; - /*! \brief ACL Allocator to request auxiliary memory from TVM. */ - ACLAllocator* allocator_; - /*! \brief Check if the function requires working memory to be allocated. */ - bool is_mm_ = false; - - /*! \brief Create individual ACL layer. */ - static void CreateConvolution2DLayer(CacheItems* cache, const api::JSONOp& params, - const std::shared_ptr& mm); - static void CreateMaxPoolLayer(CacheItems* cache, const api::JSONOp& params); - static void CreateReshapeLayer(CacheItems* cache, const api::JSONOp& params); -}; - -} // namespace acl -} // namespace contrib -} // namespace runtime -} // namespace tvm - -#endif // TVM_RUNTIME_CONTRIB_ACL_ACL_KERNEL_H_ diff --git a/src/runtime/contrib/acl/acl_runtime.cc b/src/runtime/contrib/acl/acl_runtime.cc deleted file mode 100644 index 1c372fe2c7e01..0000000000000 --- a/src/runtime/contrib/acl/acl_runtime.cc +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include -#include -#include -#include -#include - -#include - -#include "../../../relay/backend/contrib/acl/acl_api.h" -#include "../../file_util.h" - -#ifdef TVM_GRAPH_RUNTIME_ACL -#include -#include -#include - -#include "acl_allocator.h" -#include "acl_kernel.h" -#endif - -namespace tvm { -namespace runtime { - -namespace api = relay::contrib::acl; - -class ACLModule : public ModuleNode { - public: - /*! - * \brief The ACL runtime module. Deserialize the provided functions - * on creation and store in the layer cache. - * - * \param serialized_graphs A vector of (external symbol, serialized JSON subgraph) pairs. - */ - explicit ACLModule(const std::vector>& serialized_functions) { -#ifdef TVM_GRAPH_RUNTIME_ACL - auto lifetime_mgr = std::make_shared(); - auto pool_mgr = std::make_shared(); - auto mm = std::make_shared(lifetime_mgr, pool_mgr); - int num_pools = 0; -#endif - - for (const auto& it : serialized_functions) { - std::string serialized_function = it.second; - auto ds = api::DeserializeSubgraph(&serialized_function); - this->deserialized_functions_.emplace_back(it.first, ds); - -#ifdef TVM_GRAPH_RUNTIME_ACL - this->subgraph_cache_[it.first] = - std::make_shared(ds.first, ds.second, &this->allocator_, mm); - if (this->subgraph_cache_[it.first]->IsMemoryManaged()) num_pools++; -#endif - } -#ifdef TVM_GRAPH_RUNTIME_ACL - // Allocate working memory for layers. - if (num_pools > 0) mm->populate(this->allocator_, num_pools); -#endif - } - - /*! - * \brief Get a PackedFunc from the ACL module. - * - * \param name The name of the function. - * \param sptr_to_self The ObjectPtr that points to this module node. - * \return The function pointer when it is found, otherwise, PackedFunc(nullptr). - */ - PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) final { -#ifdef TVM_GRAPH_RUNTIME_ACL - if (this->subgraph_cache_.find(name) != this->subgraph_cache_.end()) { - return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) { - *rv = tvm::runtime::ACLModule::Inference(args, this->subgraph_cache_[name].get()); - }); - } -#endif - return PackedFunc(nullptr); - } - - /*! - * \brief The type key of the module. - * - * \return module type key. - */ - const char* type_key() const override { return "acl"; } - - /*! - * \brief Unpack inputs and outputs and run inference on a given layer. - * - * \param args Access inputs and outputs. - * \param function The layer to execute inference on. - * \return Status of inference. - */ -#ifdef TVM_GRAPH_RUNTIME_ACL - static bool Inference(tvm::runtime::TVMArgs args, contrib::acl::CachedLayer* function) { - // Unpack parameters - int argc = 0; - std::vector inputs; - for (size_t i = 0; i < function->GetNumInputs(); i++) { - inputs.push_back(args[argc++]); - } - std::vector outputs; - for (; argc < args.size(); argc++) { - outputs.push_back(args[argc]); - } - return function->Inference(inputs, outputs); - } -#endif - - /*! - * \brief Save a compiled network to a binary stream, which can then be - * serialized to disk. - * - * \param stream The stream to save the binary. - */ - void SaveToBinary(dmlc::Stream* stream) final { - stream->Write(this->deserialized_functions_.size()); - for (const auto& it : this->deserialized_functions_) { - stream->Write(it.first); - std::pair> subgraph_pair = it.second; - std::string serialized_function = - api::SerializeSubgraph(subgraph_pair.first, subgraph_pair.second); - stream->Write(serialized_function); - } - } - - /*! - * \brief Load a compiled network from stream. - * - * \param strm The binary stream to load. - * \return The created ACL module. - */ - static Module LoadFromBinary(void* strm) { - auto stream = static_cast(strm); - size_t func_count; - stream->Read(&func_count); - std::vector> serialized_functions; - for (unsigned int i = 0; i < func_count; i++) { - std::string ext_symbol; - std::string serialized_function; - stream->Read(&ext_symbol); - stream->Read(&serialized_function); - serialized_functions.emplace_back(std::make_pair(ext_symbol, serialized_function)); - } - auto n = make_object(serialized_functions); - return Module(n); - } - - /*! - * \brief Save a module to a specified path. - * - * \param path Where to save the serialized module. - * \param format The format of the file. - */ - void SaveToFile(const std::string& path, const std::string& format) override { - std::string data; - dmlc::MemoryStringStream writer(&data); - dmlc::SeekStream* strm = &writer; - SaveToBinary(strm); - SaveBinaryToFile(path, data); - } - - /*! - * \brief Create a module from a file. - * - * \param path The path of the file containing the serialized module. - * \return The created ACL module. - */ - static Module LoadFromFile(const std::string& path) { - std::string data; - LoadBinaryFromFile(path, &data); - dmlc::MemoryStringStream reader(&data); - return LoadFromBinary(&reader); - } - - /*! - * \brief Get the JSON generated by codegen. - * - * \param format the format to return (only JSON for the time being) - * \return A string of JSON. - */ - std::string GetSource(const std::string& format) override { - std::ostringstream os; - dmlc::JSONWriter writer(&os); - writer.BeginObject(); - for (const auto& it : deserialized_functions_) { - writer.WriteObjectKeyValue(it.first, it.second.first); - } - writer.EndObject(); - return os.str(); - } - - private: - /* \brief A vector of (external symbol, serialized JSON subgraph) pairs. */ - std::vector>>> - deserialized_functions_; - -#ifdef TVM_GRAPH_RUNTIME_ACL - /* \brief A map between ext_symbols (function names) and an ACL subgraph. - * \note Currently only a single op per subgraph is supported. Hence mapping to - * cached layer.*/ - std::map> subgraph_cache_; - /*! \brief Allow ACL functions to request auxiliary memory from TVM. */ - contrib::acl::ACLAllocator allocator_; -#endif -}; - -TVM_REGISTER_GLOBAL("runtime.module.loadfile_acl").set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = ACLModule::LoadFromFile(args[0]); -}); - -TVM_REGISTER_GLOBAL("runtime.module.loadbinary_acl").set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = ACLModule::LoadFromBinary(args[0]); -}); - -} // namespace runtime -} // namespace tvm diff --git a/src/runtime/contrib/acl/acl_utils.cc b/src/runtime/contrib/acl/acl_utils.cc deleted file mode 100644 index 6e29cc384d404..0000000000000 --- a/src/runtime/contrib/acl/acl_utils.cc +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file src/runtime/contrib/acl/acl_utils.cc - * \brief Utils and common functions for the interface. - */ - -#include "acl_utils.h" - -#include -#include - -namespace tvm { -namespace runtime { -namespace contrib { -namespace acl { - -void CheckACLError(arm_compute::Status status) { - CHECK(status.error_code() == arm_compute::ErrorCode::OK) << "ACL: " << status.error_description(); -} - -acl::Tensor MakeTensor(const api::JSONTensor& tensor_rep, void* data) { - acl::Tensor tensor; - acl::TensorInfo info = MakeTensorInfo(tensor_rep); - tensor.allocator()->init(info); - if (data != nullptr) { - CheckACLError(tensor.allocator()->import_memory(data)); - } - return tensor; -} - -acl::TensorInfo MakeTensorInfo(const api::JSONTensor& tensor_rep) { - return acl::TensorInfo(MakeTensorShape(tensor_rep.shape), 1, acl::DataType::F32, - acl::DataLayout::NHWC); -} - -arm_compute::TensorShape MakeTensorShape(const std::vector& shape) { - arm_compute::TensorShape acl_shape; - for (unsigned int i = shape.size(); i > 0; --i) { - acl_shape.set(shape.size() - i, shape[i - 1]); - } - return acl_shape; -} - -} // namespace acl -} // namespace contrib -} // namespace runtime -} // namespace tvm diff --git a/src/runtime/contrib/acl/acl_allocator.cc b/src/runtime/contrib/arm_compute_lib/acl_allocator.cc similarity index 64% rename from src/runtime/contrib/acl/acl_allocator.cc rename to src/runtime/contrib/arm_compute_lib/acl_allocator.cc index b72ec9552130c..18372dcde1003 100644 --- a/src/runtime/contrib/acl/acl_allocator.cc +++ b/src/runtime/contrib/arm_compute_lib/acl_allocator.cc @@ -18,7 +18,7 @@ */ /*! - * \file src/runtime/contrib/acl/acl_allocator.cc + * \file src/runtime/contrib/arm_compute_lib/acl_allocator.cc * \brief ACL Allocator implementation that requests memory from TVM. */ @@ -27,7 +27,7 @@ namespace tvm { namespace runtime { namespace contrib { -namespace acl { +namespace arm_compute_lib { void* ACLAllocator::allocate(size_t size, size_t alignment) { CHECK_GT(size, 0) << "Cannot allocate size less than or equal to zero"; @@ -36,38 +36,42 @@ void* ACLAllocator::allocate(size_t size, size_t alignment) { void ACLAllocator::free(void* ptr) { this->device_api_->FreeWorkspace(this->ctx_, ptr); } -std::unique_ptr ACLAllocator::make_region(size_t size, size_t alignment) { - return acl::support::cpp14::make_unique(size, alignment); +std::unique_ptr ACLAllocator::make_region(size_t size, + size_t alignment) { + return arm_compute::support::cpp14::make_unique(size, alignment); } -ACLMemoryRegion::ACLMemoryRegion(size_t size, size_t alignment) : IMemoryRegion(size) { - CHECK_GT(size, 0) << "Cannot allocate size less than or equal to zero"; - this->ptr_ = this->device_api_->AllocDataSpace(this->ctx_, size, alignment, {}); +ACLMemoryRegion::ACLMemoryRegion(size_t size, size_t alignment) + : IMemoryRegion(size), ptr_(nullptr) { + if (size != 0) { + this->ptr_ = this->device_api_->AllocDataSpace(this->ctx_, size, alignment, {}); + } } ACLMemoryRegion::ACLMemoryRegion(void* ptr, size_t size) - : IMemoryRegion(size), is_subregion_(true) { + : IMemoryRegion(size), ptr_(nullptr), is_subregion_(true) { if (size != 0) { this->ptr_ = ptr; } } ACLMemoryRegion::~ACLMemoryRegion() { - if (!is_subregion_) { + if (this->ptr_ != nullptr && !is_subregion_) { this->device_api_->FreeDataSpace(this->ctx_, this->ptr_); } } -std::unique_ptr ACLMemoryRegion::extract_subregion(size_t offset, size_t size) { +std::unique_ptr ACLMemoryRegion::extract_subregion(size_t offset, + size_t size) { if (this->ptr_ != nullptr && (offset < _size) && (_size - offset >= size)) { - return acl::support::cpp14::make_unique( + return arm_compute::support::cpp14::make_unique( static_cast(this->ptr_) + offset, size); } else { return nullptr; } } -} // namespace acl +} // namespace arm_compute_lib } // namespace contrib } // namespace runtime } // namespace tvm diff --git a/src/runtime/contrib/acl/acl_allocator.h b/src/runtime/contrib/arm_compute_lib/acl_allocator.h similarity index 88% rename from src/runtime/contrib/acl/acl_allocator.h rename to src/runtime/contrib/arm_compute_lib/acl_allocator.h index d608645947891..407092894a443 100644 --- a/src/runtime/contrib/acl/acl_allocator.h +++ b/src/runtime/contrib/arm_compute_lib/acl_allocator.h @@ -18,12 +18,12 @@ */ /*! - * \file src/runtime/contrib/acl/acl_allocator.h + * \file src/runtime/contrib/arm_compute_lib/acl_allocator.h * \brief ACL Allocator implementation that requests memory from TVM. */ -#ifndef TVM_RUNTIME_CONTRIB_ACL_ACL_ALLOCATOR_H_ -#define TVM_RUNTIME_CONTRIB_ACL_ACL_ALLOCATOR_H_ +#ifndef TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_ALLOCATOR_H_ +#define TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_ALLOCATOR_H_ #include #include @@ -38,9 +38,7 @@ namespace tvm { namespace runtime { namespace contrib { -namespace acl { - -namespace acl = arm_compute; +namespace arm_compute_lib { /*! * \brief Override ACL memory allocator and replace with TVM workspace based allocation. @@ -74,7 +72,7 @@ class ACLAllocator : public arm_compute::IAllocator { * \param alignment Alignment of the memory region. * \return The memory region object. */ - std::unique_ptr make_region(size_t size, size_t alignment) override; + std::unique_ptr make_region(size_t size, size_t alignment) override; private: /*! \brief Always allocate data in the context of the current CPU. */ @@ -120,7 +118,8 @@ class ACLMemoryRegion : public arm_compute::IMemoryRegion { * \return A wrapped memory sub-region with no ownership of the * underlying memory. */ - std::unique_ptr extract_subregion(size_t offset, size_t size) override; + std::unique_ptr extract_subregion(size_t offset, + size_t size) override; private: /*! \brief Points to a region of memory allocated by TVM. */ @@ -133,9 +132,9 @@ class ACLMemoryRegion : public arm_compute::IMemoryRegion { runtime::DeviceAPI* device_api_ = runtime::DeviceAPI::Get(ctx_); }; -} // namespace acl +} // namespace arm_compute_lib } // namespace contrib } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_CONTRIB_ACL_ACL_ALLOCATOR_H_ +#endif // TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_ALLOCATOR_H_ diff --git a/src/runtime/contrib/arm_compute_lib/acl_runtime.cc b/src/runtime/contrib/arm_compute_lib/acl_runtime.cc new file mode 100644 index 0000000000000..b419af84f2aba --- /dev/null +++ b/src/runtime/contrib/arm_compute_lib/acl_runtime.cc @@ -0,0 +1,399 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/contrib/arm_compute_lib/acl_runtime.cc + * \brief A simple JSON runtime for Arm Compute Library. + */ + +#include +#include + +#include "../../file_util.h" +#include "../json/json_node.h" +#include "../json/json_runtime.h" + +#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB +#include +#include +#include +#include + +#include "acl_allocator.h" +#include "acl_utils.h" +#endif + +namespace tvm { +namespace runtime { +namespace contrib { + +using namespace tvm::runtime::json; + +#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB +using namespace arm_compute_lib; + +/*! + * \brief ACL objects we cache in order to avoid needing to construct + * a new layer each time. + */ +struct CachedLayer { + std::shared_ptr function; + std::vector inputs; + std::vector const_inputs; + std::vector outputs; +}; +#endif + +class ACLRuntime : public JSONRuntimeBase { + public: + /*! + * \brief The ACL runtime module. Deserialize the provided functions + * on creation and store in the layer cache. + * + * \param symbol_name The name of the function. + * \param graph_json serialized JSON representation of a sub-graph. + * \param const_names The names of each constant in the sub-graph. + * \params consts An array of constants pre-transposed to the correct layout expected by ACL. + */ + explicit ACLRuntime(const std::string& symbol_name, const std::string& graph_json, + const Array& const_names, const Array& consts) + : JSONRuntimeBase(symbol_name, graph_json, const_names) { + this->constants_ = consts; + } + + /*! + * \brief Get a packed function. + * + * \param name The name/symbol of the function. + * \param sptr_to_self The pointer to the module node. + * \return The packed function. + */ + PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) override { + if (name == "get_symbol") { + return PackedFunc( + [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->symbol_name_; }); + } else if (name == "get_const_vars") { + return PackedFunc( + [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->const_names_; }); + } else if (this->symbol_name_ == name) { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + CHECK(this->initialized_) << "The module has not been initialized"; + + // Bind argument tensors to data entries. + this->SetInputOutputBuffers(args); + // Execute the subgraph. + this->Run(); + }); + } else if ("__init_" + this->symbol_name_ == name) { + // The function to initialize constant tensors. + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + this->Init(); + this->initialized_ = true; + *rv = 0; + }); + } else { + return PackedFunc(nullptr); + } + } + + /*! + * \brief Save a compiled network to a binary stream, which can then be + * serialized to disk. + * + * \param stream The stream to save the binary. + */ + void SaveToBinary(dmlc::Stream* stream) override { + // Save the symbol + stream->Write(symbol_name_); + // Save the graph + stream->Write(graph_json_); + // Save the required const names + std::vector const_names; + for (const auto& it : const_names_) { + const_names.push_back(it); + } + stream->Write(const_names); + // Save the required constant data + stream->Write(constants_.size()); + for (const auto& it : constants_) { + it.Save(stream); + } + } + + /*! + * \brief Load a compiled network from stream. + * + * \param strm The binary stream to load. + * \return The created ACL module. + */ + static Module LoadFromBinary(void* strm) { + dmlc::Stream* stream = static_cast(strm); + std::string symbol; + std::string graph_json; + std::vector consts; + // Load the symbol + CHECK(stream->Read(&symbol)) << "Loading symbol name failed"; + CHECK(stream->Read(&graph_json)) << "Loading graph json failed"; + CHECK(stream->Read(&consts)) << "Loading the const name list failed"; + Array const_names; + for (const auto& it : consts) { + const_names.push_back(it); + } + size_t const_data_count; + CHECK(stream->Read(&const_data_count)); + Array const_data; + for (size_t i = 0; i < const_data_count; ++i) { + runtime::NDArray temp; + CHECK(temp.Load(stream)) << "Failed to load constant"; + const_data.push_back(temp); + } + auto n = make_object(symbol, graph_json, const_names, const_data); + return Module(n); + } + + /*! + * \brief The type key of the module. + * + * \return module type key. + */ + const char* type_key() const override { return "arm_compute_lib"; } + + /*! + * \brief Initialize runtime. Create ACL layer from JSON + * representation. + */ + void Init() { + CHECK_EQ(this->constants_.size(), const_idx_.size()) + << "The number of input constants must match the number expected."; + this->SetupConstants(this->constants_); +#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB + BuildEngine(); +#endif + } + + // Do not accept constants from MetadataModule as they should be transposed + // by the ACL codegen so they have the correct expected layout. + void Init(const Array& constants) override { LOG(FATAL) << "Not implemented."; } + + /*! + * \brief Unpack inputs and outputs and run inference on a given layer. + * + * \param args Access inputs and outputs. + * \param function The layer to execute inference on. + * \return Status of inference. + */ + void Run() override { +#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB + for (size_t i = 0; i < input_nodes_.size(); ++i) { + auto nid = input_nodes_[i]; + uint32_t eid = EntryID(nid, 0); + if (nodes_[nid].GetOpType() == "input") { + void* data = data_entry_[eid]->data; + CheckACLError(layer_.inputs[i].allocator()->import_memory(data)); + } + } + + for (size_t i = 0; i < outputs_.size(); ++i) { + uint32_t eid = EntryID(outputs_[i]); + void* data = data_entry_[eid]->data; + CheckACLError(layer_.outputs[i].allocator()->import_memory(data)); + } + + this->layer_.function->run(); +#else + LOG(FATAL) << "Cannot call run on Arm Compute Library module without runtime enabled. " + << "Please build with USE_ACL_GRAPH_RUNTIME."; +#endif + } + + /*! + * \brief Get the JSON generated by codegen. + * + * \param format the format to return (only JSON for the time being) + * \return A string of JSON. + */ + std::string GetSource(const std::string& format) override { + if (format == "json") { + return graph_json_; + } + LOG(FATAL) << "Format not supported by Arm Compute Library runtime."; + return ""; + } + + private: +#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB + /*! + * \brief Build ACL layer from JSON representation and cache. + * + * \note For the time being only one layer or operator is supported + * per engine. + */ + void BuildEngine() { + std::shared_ptr mm = MakeMemoryManager(); + int num_pools = 0; + + for (size_t i = 0; i < input_nodes_.size(); ++i) { + uint32_t nid = input_nodes_[i]; + const auto& node = nodes_[nid]; + if (node.GetOpType() == "input") { + layer_.inputs.push_back(MakeTensor(node)); + } else if (node.GetOpType() == "const") { + uint32_t eid = EntryID(nid, 0); + void* data = data_entry_[eid]->data; + layer_.const_inputs.push_back(MakeTensor(node, data)); + } + } + + for (size_t nid = 0; nid < nodes_.size(); ++nid) { + const auto& node = nodes_[nid]; + if (node.GetOpType() == "kernel") { + auto op_name = node.GetOpName(); + if ("nn.conv2d" == op_name || "arm_compute_lib.conv2d" == op_name) { + CreateConvolution2DLayer(&layer_, node, mm); + num_pools++; + } else if ("nn.max_pool2d" == op_name) { + CreatePoolingLayer(&layer_, node); + } else if ("reshape" == op_name) { + CreateReshapeLayer(&layer_, node); + } else { + LOG(FATAL) << "Unsupported op: " << op_name; + } + // Only expect one op for the time being + break; + } + } + + this->layer_.function->prepare(); + if (num_pools > 0) mm->populate(this->allocator_, num_pools); + } + + /*! + * \brief Create a 2D convolution layer. + * + * \param layer The ACL layer to build. Containing inputs, outputs and the ACL function. + * \param node The JSON representation of the operator. + * \param mm The ACL conv2d layer can request auxiliary memory from TVM. + */ + static void CreateConvolution2DLayer( + CachedLayer* layer, const JSONGraphNode& node, + const std::shared_ptr& mm) { + std::vector padding = node.GetAttr>("padding"); + std::vector strides = node.GetAttr>("strides"); + std::vector dilation = node.GetAttr>("dilation"); + arm_compute::PadStrideInfo pad_stride_info = ToACLPadStride(padding, strides); + + int groups = std::stoi(node.GetAttr>("groups")[0]); + CHECK(groups == 1) << "Arm Compute Library NEON convolution only supports group size of 1."; + + arm_compute::ActivationLayerInfo act_info; + if (node.HasAttr("activation_type")) { + std::string activation_type = node.GetAttr>("activation_type")[0]; + if (activation_type == "relu") { + act_info = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::RELU); + } else { + LOG(FATAL) << "Unsupported activation function"; + } + } + + arm_compute::Size2D dilation_2d(std::stoi(dilation[0]), std::stoi(dilation[1])); + + layer->outputs.push_back(MakeOutputTensor(node.GetOpShape()[0])); + + auto function = std::make_shared(mm); + function->configure(&layer->inputs[0], &layer->const_inputs[0], + layer->const_inputs.size() > 1 ? &layer->const_inputs[1] : nullptr, + &layer->outputs[0], pad_stride_info, arm_compute::WeightsInfo(), + dilation_2d, act_info); + layer->function = function; + } + + /*! + * \brief Create a pooling layer. + * + * \note Currently only maxpool is supported. + * + * \param layer The ACL layer to build. Containing inputs, outputs and the ACL function. + * \param node The JSON representation of the operator. + */ + static void CreatePoolingLayer(CachedLayer* layer, const JSONGraphNode& node) { + std::vector padding = node.GetAttr>("padding"); + std::vector strides = node.GetAttr>("strides"); + arm_compute::PadStrideInfo pad_stride_info = ToACLPadStride(padding, strides); + + auto attr_pool_size = node.GetAttr>("pool_size"); + int pool_size_h = std::stoi(attr_pool_size[0]); + int pool_size_w = std::stoi(attr_pool_size[1]); + + arm_compute::PoolingType pool_type; + if (node.GetOpName() == "nn.max_pool2d") { + pool_type = arm_compute::PoolingType::MAX; + } else { + LOG(FATAL) << "Pooling type not supported"; + } + + arm_compute::PoolingLayerInfo pool_info = + arm_compute::PoolingLayerInfo(pool_type, arm_compute::Size2D(pool_size_h, pool_size_w), + arm_compute::DataLayout::NHWC, pad_stride_info); + + layer->outputs.push_back(MakeOutputTensor(node.GetOpShape()[0])); + + auto function = std::make_shared(); + function->configure(&layer->inputs[0], &layer->outputs[0], pool_info); + layer->function = function; + } + + /*! + * \brief Create a reshape layer. + * + * \param layer The ACL layer to build. Containing inputs, outputs and the ACL function. + * \param node The JSON representation of the operator. + */ + static void CreateReshapeLayer(CachedLayer* layer, const JSONGraphNode& node) { + layer->outputs.push_back(MakeOutputTensor(node.GetOpShape()[0])); + auto function = std::make_shared(); + function->configure(&layer->inputs[0], &layer->outputs[0]); + layer->function = function; + } + + /*! \brief Allow ACL functions to request auxiliary memory from TVM. */ + arm_compute_lib::ACLAllocator allocator_; + /*! \brief The network layers represented by acl functions. Note: currently only supports a single + * layer. */ + CachedLayer layer_; +#endif + + /*! \brief Array of pre-transposed constants from ACL codegen. */ + Array constants_; +}; + +runtime::Module ACLRuntimeCreate(const String& symbol_name, const String& graph_json, + const Array& const_names, const Array& consts) { + auto n = make_object(symbol_name, graph_json, const_names, consts); + return runtime::Module(n); +} + +TVM_REGISTER_GLOBAL("runtime.arm_compute_lib_runtime_create").set_body_typed(ACLRuntimeCreate); + +TVM_REGISTER_GLOBAL("runtime.module.loadbinary_arm_compute_lib") + .set_body_typed(ACLRuntime::LoadFromBinary); + +} // namespace contrib +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/contrib/arm_compute_lib/acl_utils.cc b/src/runtime/contrib/arm_compute_lib/acl_utils.cc new file mode 100644 index 0000000000000..a508181473e13 --- /dev/null +++ b/src/runtime/contrib/arm_compute_lib/acl_utils.cc @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/contrib/arm_compute_lib/acl_utils.cc + * \brief Utils and common functions for the interface. + */ + +#include "acl_utils.h" + +#include +#include +#include + +namespace tvm { +namespace runtime { +namespace contrib { +namespace arm_compute_lib { + +using JSONGraphNode = tvm::runtime::json::JSONGraphNode; + +void CheckACLError(const arm_compute::Status& status) { + CHECK(status.error_code() == arm_compute::ErrorCode::OK) << "ACL: " << status.error_description(); +} + +arm_compute::Tensor MakeTensor(const JSONGraphNode& tensor_rep, void* data) { + CHECK(tensor_rep.GetOpType() == "input" || tensor_rep.GetOpType() == "const"); + arm_compute::Tensor tensor; + arm_compute::TensorInfo info = MakeTensorInfo(tensor_rep.GetOpShape()[0]); + tensor.allocator()->init(info); + if (data != nullptr) { + CheckACLError(tensor.allocator()->import_memory(data)); + } + return tensor; +} + +arm_compute::Tensor MakeOutputTensor(const std::vector& shape) { + arm_compute::Tensor tensor; + tensor.allocator()->init(MakeTensorInfo(shape)); + return tensor; +} + +arm_compute::TensorInfo MakeTensorInfo(const std::vector& shape) { + arm_compute::TensorShape acl_shape = MakeTensorShape(shape); + return arm_compute::TensorInfo(acl_shape, 1, arm_compute::DataType::F32, + arm_compute::DataLayout::NHWC); +} + +arm_compute::TensorShape MakeTensorShape(const std::vector& shape) { + arm_compute::TensorShape acl_shape; + for (unsigned int i = shape.size(); i > 0; --i) { + acl_shape.set(shape.size() - i, shape[i - 1]); + } + return acl_shape; +} + +std::shared_ptr MakeMemoryManager() { + auto lifetime_mgr = std::make_shared(); + auto pool_mgr = std::make_shared(); + return std::make_shared(lifetime_mgr, pool_mgr); +} + +arm_compute::PadStrideInfo ToACLPadStride(const std::vector& pad, + const std::vector& stride) { + int pad_0, pad_1, pad_2, pad_3; + + size_t size = pad.size(); + if (size == 1) { + int pad_v = std::stoi(pad[0]); + pad_0 = pad_v; + pad_1 = pad_v; + pad_2 = pad_v; + pad_3 = pad_v; + } else if (size == 2) { + // TVM: height, width -> ACL: left, right, top, bottom + int pad_h = std::stoi(pad[0]); + int pad_w = std::stoi(pad[1]); + pad_0 = pad_w; + pad_1 = pad_w; + pad_2 = pad_h; + pad_3 = pad_h; + } else if (size == 4) { + // TVM: top, left, bottom, right -> ACL: left, right, top, bottom + pad_0 = std::stoi(pad[1]); + pad_1 = std::stoi(pad[3]); + pad_2 = std::stoi(pad[0]); + pad_3 = std::stoi(pad[2]); + } else { + LOG(FATAL) << "Unsupported padding dimensions"; + return arm_compute::PadStrideInfo(); + } + + return arm_compute::PadStrideInfo(std::stoi(stride[0]), std::stoi(stride[1]), pad_0, pad_1, pad_2, + pad_3, arm_compute::DimensionRoundingType::FLOOR); +} + +} // namespace arm_compute_lib +} // namespace contrib +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/contrib/acl/acl_utils.h b/src/runtime/contrib/arm_compute_lib/acl_utils.h similarity index 52% rename from src/runtime/contrib/acl/acl_utils.h rename to src/runtime/contrib/arm_compute_lib/acl_utils.h index 111121d48308e..41de7b58302ff 100644 --- a/src/runtime/contrib/acl/acl_utils.h +++ b/src/runtime/contrib/arm_compute_lib/acl_utils.h @@ -18,39 +18,41 @@ */ /*! - * \file src/runtime/contrib/acl/acl_utils.h + * \file src/runtime/contrib/arm_compute_lib/acl_utils.h * \brief Utils and common functions for the interface. */ -#ifndef TVM_RUNTIME_CONTRIB_ACL_ACL_UTILS_H_ -#define TVM_RUNTIME_CONTRIB_ACL_ACL_UTILS_H_ +#ifndef TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_UTILS_H_ +#define TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_UTILS_H_ #include +#include #include +#include +#include #include -#include "../../../relay/backend/contrib/acl/acl_api.h" +#include "../json/json_node.h" namespace tvm { namespace runtime { namespace contrib { -namespace acl { +namespace arm_compute_lib { -namespace api = relay::contrib::acl; -namespace acl = arm_compute; +using JSONGraphNode = tvm::runtime::json::JSONGraphNode; /*! * \brief Check if there are any errors from acl and forward them to TVM. * - * \param status status of called function. - * * Status values: * - 0 => OK * - 1 => RUNTIME_ERROR * - 2 => UNSUPPORTED_EXTENSION_USE + * + * \param status status of called function. */ -void CheckACLError(acl::Status status); +void CheckACLError(const arm_compute::Status& status); /*! * \brief Make an acl tensor from JSON tensor representation. @@ -59,29 +61,55 @@ void CheckACLError(acl::Status status); * \param data (optional) Initialize the tensor with memory. * \return arm_compute::Tensor. */ -acl::Tensor MakeTensor(const api::JSONTensor& tensor_rep, void* data = nullptr); +arm_compute::Tensor MakeTensor(const JSONGraphNode& tensor_rep, void* data = nullptr); + +/*! + * \brief Make an acl tensor from type and shape, without having a JSON representation. + * + * \param shape The shape of the tensor to create. + * \return arm_compute::Tensor. + */ +arm_compute::Tensor MakeOutputTensor(const std::vector& shape); /*! * \brief Make an acl tensor info object from JSON tensor * representation. * - * \param tensor_rep A JSON tensor representation. + * \param shape The shape of the tensor to create. * \return arm_compute::TensorInfo. */ -acl::TensorInfo MakeTensorInfo(const api::JSONTensor& tensor_rep); +arm_compute::TensorInfo MakeTensorInfo(const std::vector& shape); /*! * \brief Convert vector object to acl TensorShape. * \note This requires reversing the given vector. * * \param shape The shape of the tensor as a vector. - * \return acl TensorShape. + * \return arm_compute::TensorShape. + */ +arm_compute::TensorShape MakeTensorShape(const std::vector& shape); + +/*! + * \brief Create a memory manager for use with a layer that + * requires working memory. + * + * \return reference counted memory manager. + */ +std::shared_ptr MakeMemoryManager(); + +/* + * \brief Convert TVM padding and stride format to acl PadStrideInfo. + * + * \param pad The pad vector. + * \param stride The stride vector. + * \return arm_compute::PadStrideInfo */ -acl::TensorShape MakeTensorShape(const std::vector& shape); +arm_compute::PadStrideInfo ToACLPadStride(const std::vector& pad, + const std::vector& stride); -} // namespace acl +} // namespace arm_compute_lib } // namespace contrib } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_CONTRIB_ACL_ACL_UTILS_H_ +#endif // TVM_RUNTIME_CONTRIB_ARM_COMPUTE_LIB_ACL_UTILS_H_ diff --git a/src/runtime/contrib/json/json_node.h b/src/runtime/contrib/json/json_node.h index 7468feb21cb13..7cb17de9db7c8 100644 --- a/src/runtime/contrib/json/json_node.h +++ b/src/runtime/contrib/json/json_node.h @@ -272,6 +272,15 @@ class JSONGraphNode { attrs_[key] = value; } + /*! + * \brief Check if node has attribute. + * + * \param key The key of the attribute. + * + * \return True if attribute exists, false otherwise. + */ + bool HasAttr(const std::string& key) const { return attrs_.find(key) != attrs_.end(); } + virtual ~JSONGraphNode() {} private: diff --git a/tests/python/contrib/test_acl/__init__.py b/tests/python/contrib/test_arm_compute_lib/__init__.py similarity index 93% rename from tests/python/contrib/test_acl/__init__.py rename to tests/python/contrib/test_arm_compute_lib/__init__.py index a8671172febde..fd14be1cc34dd 100644 --- a/tests/python/contrib/test_acl/__init__.py +++ b/tests/python/contrib/test_arm_compute_lib/__init__.py @@ -14,4 +14,4 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""Infrastructure and tests for ACL""" +"""Infrastructure and tests for Arm Compute Library""" diff --git a/tests/python/contrib/test_acl/infrastructure.py b/tests/python/contrib/test_arm_compute_lib/infrastructure.py similarity index 76% rename from tests/python/contrib/test_acl/infrastructure.py rename to tests/python/contrib/test_arm_compute_lib/infrastructure.py index 04c5d2784c28a..a82e980e88fe8 100644 --- a/tests/python/contrib/test_acl/infrastructure.py +++ b/tests/python/contrib/test_arm_compute_lib/infrastructure.py @@ -21,14 +21,14 @@ from tvm import relay from tvm import rpc from tvm.contrib import graph_runtime -from tvm.relay.op.contrib import acl +from tvm.relay.op.contrib import arm_compute_lib from tvm.contrib import util class Device: """Adjust the following settings to connect to and use a remote device for tests.""" use_remote = False - target = "llvm -target=aarch64-linux-gnu -mattr=+neon" + target = "llvm -mtriple=aarch64-linux-gnu -mattr=+neon" # Enable cross compilation when connecting a remote device from a non-arm platform. cross_compile = None # cross_compile = "aarch64-linux-gnu-g++" @@ -69,20 +69,20 @@ def _get_remote(cls): def skip_runtime_test(): """Skip test if it requires the runtime and it's not present.""" # ACL codegen not present. - if not tvm.get_global_func("relay.ext.acl", True): - print("Skip because ACL codegen is not available.") + if not tvm.get_global_func("relay.ext.arm_compute_lib", True): + print("Skip because Arm Compute Library codegen is not available.") return True # Remote device is in use or ACL runtime not present - if not Device.use_remote and not acl.is_acl_runtime_present(): + if not Device.use_remote and not arm_compute_lib.is_arm_compute_runtime_present(): print("Skip because runtime isn't present or a remote device isn't being used.") return True def skip_codegen_test(): """Skip test if it requires the ACL codegen and it's not present.""" - if not tvm.get_global_func("relay.ext.acl", True): - print("Skip because ACL codegen is not available.") + if not tvm.get_global_func("relay.ext.arm_compute_lib", True): + print("Skip because Arm Compute Library codegen is not available.") return True @@ -90,9 +90,10 @@ def build_module(mod, target, params=None, enable_acl=True): """Build module with option to build for ACL.""" if isinstance(mod, tvm.relay.expr.Call): mod = tvm.IRModule.from_expr(mod) - with tvm.transform.PassContext(opt_level=3): + with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): if enable_acl: - mod = acl.partition_for_acl(mod, params) + mod = arm_compute_lib.partition_for_arm_compute_lib(mod, params) + relay.backend.compile_engine.get().clear() return relay.build(mod, target=target, params=params) @@ -136,27 +137,31 @@ def verify(answers, atol, rtol): def extract_acl_modules(module): """Get the ACL module(s) from llvm module.""" - return list(filter(lambda mod: mod.type_key == "acl", + return list(filter(lambda mod: mod.type_key == "arm_compute_lib", module.imported_modules)) def verify_codegen(module, known_good_codegen, num_acl_modules, - target="llvm -target=aarch64-linux-gnu -mattr=+neon"): + target="llvm -mtriple=aarch64-linux-gnu -mattr=+neon"): """Check acl codegen against a known good output.""" _, module, _ = build_module(module, target) acl_modules = extract_acl_modules(module) assert len(acl_modules) == num_acl_modules, \ - f"The number of ACL modules produced ({len(acl_modules)}) does not " \ + f"The number of Arm Compute Library modules produced ({len(acl_modules)}) does not " \ f"match the expected value ({num_acl_modules})." for mod in acl_modules: - source = mod.get_source() - source_json = json.loads(source) - func_name = list(source_json.keys())[0] - codegen = source_json[func_name]["node"] - - assert codegen == known_good_codegen, \ + source = mod.get_source("json") + codegen = json.loads(source)["nodes"] + # remove input and const names as these cannot be predetermined + for node in range(len(codegen)): + if codegen[node]["op"] == "input" or codegen[node]["op"] == "const": + codegen[node]["name"] = "" + codegen_str = json.dumps(codegen, sort_keys=True, indent=2) + known_good_codegen_str = json.dumps(known_good_codegen, sort_keys=True, indent=2) + + assert codegen_str == known_good_codegen_str, \ f"The JSON produced by codegen does not match the expected result. \n" \ - f"Actual={json.dumps(codegen, sort_keys=True, indent=2)} \n" \ - f"Expected={json.dumps(known_good_codegen, sort_keys=True, indent=2)}" + f"Actual={codegen_str} \n" \ + f"Expected={known_good_codegen_str}" diff --git a/tests/python/contrib/test_acl/test_conv2d.py b/tests/python/contrib/test_arm_compute_lib/test_conv2d.py similarity index 79% rename from tests/python/contrib/test_acl/test_conv2d.py rename to tests/python/contrib/test_arm_compute_lib/test_conv2d.py index a2724315c4e8e..f09589e54f3b9 100644 --- a/tests/python/contrib/test_acl/test_conv2d.py +++ b/tests/python/contrib/test_arm_compute_lib/test_conv2d.py @@ -69,43 +69,56 @@ def _get_model(shape, kernel_size, padding, strides, def _get_expected_codegen(shape, kernel_size, padding, strides, dilation, groups, dtype, channels, has_bias=False, has_activation=False): - codegen = { - "name": "conv2d", - "inputs": [], - "outputs": [], - "attrs": { - "groups": ["Int", 1], - "num_inputs": ["Size_t", 2], - "num_outputs": ["Size_t", 1] - } - } - if len(padding) == 2: padding = (padding[0], padding[1], padding[0], padding[1]) - # Transpose padding to match ACL format - padding = (padding[1], padding[3], padding[0], padding[2]) weight_shape = (channels, kernel_size, kernel_size, shape[3] // groups) - output_height = ((shape[1] - kernel_size + padding[2] + padding[3]) / strides[0]) + 1 - output_width = ((shape[2] - kernel_size + padding[0] + padding[1]) / strides[1]) + 1 + output_height = ((shape[1] - kernel_size + padding[0] + padding[2]) / strides[0]) + 1 + output_width = ((shape[2] - kernel_size + padding[1] + padding[3]) / strides[1]) + 1 output_shape = (1, int(output_height), int(output_width), channels) - codegen["attrs"]["padding"] = ["IntVector", list(padding)] - codegen["attrs"]["strides"] = ["IntVector", list(strides)] - if has_activation: - codegen["attrs"]["activation_type"] = ["String", "relu"] + node = { + "op": "kernel", + "name": "arm_compute_lib.conv2d", + "inputs": [[0, 0, 0], [1, 0, 0]], + "attrs": { + "groups": [["1"]], + "num_inputs": str(3 if has_bias else 2), + "num_outputs": "1", + "data_layout": [["NHWC"]], + "kernel_layout": [["OHWI"]], + "channels": [["1"]], + "dilation": [["1", "1"]], + "out_layout": [[""]], + "out_dtype": [[""]], + "kernel_size": [[str(kernel_size), str(kernel_size)]], + "shape": [[list(output_shape)]], + "dtype": [[dtype]], + "padding": [[str(p) for p in padding]], + "strides": [[str(s) for s in strides]] + }, + } - inputs = [{"type": "var", "shape": list(shape)}, - {"type": "const", "shape": list(weight_shape)}] - if has_bias: - inputs.append({"type": "const", "shape": [weight_shape[0]]}) - outputs = [{"type": "var", "shape": list(output_shape)}] + if has_activation: + node["attrs"]["activation_type"] = [["relu"]] - codegen["inputs"] = inputs - codegen["outputs"] = outputs - codegen["attrs"]["num_inputs"] = ["Size_t", len(inputs)] - codegen["attrs"]["num_outputs"] = ["Size_t", len(outputs)] + input = { + "op": "input", + "name": "", + "attrs": {"shape": [[list(shape)]], "dtype": [["float32"]]}} + kernel = { + "op": "const", + "name": "", + "attrs": {"shape": [[list(weight_shape)]], "dtype": [["float32"]]}} - return codegen + if has_bias: + bias = { + "op": "const", + "name": "", + "attrs": {"shape": [[[weight_shape[0]]]], "dtype": [["float32"]]}} + node["inputs"].append([2, 0, 0]) + return [input, kernel, bias, node] + else: + return [input, kernel, node] def test_conv2d(): @@ -113,15 +126,16 @@ def test_conv2d(): return device = Device() + np.random.seed(0) - shape = (1, 25, 25, 1) + shape = (1, 14, 14, 32) dtype = "float32" inputs = { "a": tvm.nd.array(np.random.uniform(-128, 127, shape).astype(dtype)), } - for kernel_size in [2, 3]: + for kernel_size in [1, 2, 3]: outputs = [] func, params = _get_model(shape, kernel_size, (0, 0), (1, 1), 1, 1, @@ -181,7 +195,6 @@ def test_codegen_conv2d(): func, params = _get_model(*args, var_names=iter(inputs)) exp_codegen = _get_expected_codegen(*args) verify_codegen(func, exp_codegen, 1) - # Test composite convolution: (has_pad, has_bias, has_activation). for composite in [(False, True, False), (False, False, True), (False, True, True), (True, False, False)]: @@ -192,8 +205,7 @@ def test_codegen_conv2d(): has_activation=composite[2]) exp_codegen = _get_expected_codegen(*args, has_bias=composite[1], - has_activation=composite[2], - ) + has_activation=composite[2]) verify_codegen(func, exp_codegen, 1) diff --git a/tests/python/contrib/test_acl/test_network.py b/tests/python/contrib/test_arm_compute_lib/test_network.py similarity index 99% rename from tests/python/contrib/test_acl/test_network.py rename to tests/python/contrib/test_arm_compute_lib/test_network.py index e5afe905228f1..9b8ff0088b5ab 100644 --- a/tests/python/contrib/test_acl/test_network.py +++ b/tests/python/contrib/test_arm_compute_lib/test_network.py @@ -27,6 +27,7 @@ def _build_and_run_keras_network(mod, params, inputs, device): """Helper function to build and run a network from the Keras frontend.""" data = {} + np.random.seed(0) for name, shape in inputs.items(): data[name] = np.random.uniform(-128, 127, shape).astype("float32") diff --git a/tests/python/contrib/test_acl/test_pooling.py b/tests/python/contrib/test_arm_compute_lib/test_pooling.py similarity index 72% rename from tests/python/contrib/test_acl/test_pooling.py rename to tests/python/contrib/test_arm_compute_lib/test_pooling.py index 8fb1e93d6ac07..bb3758ab91c33 100644 --- a/tests/python/contrib/test_acl/test_pooling.py +++ b/tests/python/contrib/test_arm_compute_lib/test_pooling.py @@ -37,42 +37,34 @@ def _get_model(shape, typef, sizes, strides, padding, def _get_expected_codegen(shape, typef, sizes, strides, padding, ceil_mode): - codegen = { - "name": "max_pool", - "inputs": [], - "outputs": [], - "attrs": { - "pooling_type": ["String", "max"] - } - } - if len(padding) == 2: padding = (padding[1], padding[1], padding[0], padding[0]) - # Transpose padding to match ACL format - padding = (padding[1], padding[3], padding[0], padding[2]) - output_height = ((shape[1] - sizes[0] + padding[2] + padding[3]) / strides[0]) + 1 - output_width = ((shape[2] - sizes[1] + padding[0] + padding[1]) / strides[1]) + 1 + output_height = ((shape[1] - sizes[0] + padding[0] + padding[2]) / strides[0]) + 1 + output_width = ((shape[2] - sizes[1] + padding[1] + padding[3]) / strides[1]) + 1 output_shape = (1, int(output_height), int(output_width), shape[3]) - if typef == relay.nn.max_pool2d: - pooling_type = "max" - else: - raise NotImplementedError(f"No conversion from {typef} to pooling_type string.") - - codegen["attrs"]["padding"] = ["IntVector", list(padding)] - codegen["attrs"]["strides"] = ["IntVector", list(strides)] - codegen["attrs"]["pool_size"] = ["IntVector", list(sizes)] - codegen["attrs"]["pooling_type"] = ["String", pooling_type] - - inputs = [{"type": "var", "shape": list(shape)}] - outputs = [{"type": "var", "shape": list(output_shape)}] - - codegen["inputs"] = inputs - codegen["outputs"] = outputs - codegen["attrs"]["num_inputs"] = ["Size_t", len(inputs)] - codegen["attrs"]["num_outputs"] = ["Size_t", len(outputs)] + node = { + "op": "kernel", + "name": "nn.max_pool2d", + "inputs": [[0, 0, 0]], + "attrs": { + "num_inputs": "1", + "num_outputs": "1", + "layout": [["NHWC"]], + "shape": [[list(output_shape)]], + "dtype": [["float32"]], + "padding": [[str(p) for p in padding]], + "strides": [[str(s) for s in strides]], + "pool_size": [[str(s) for s in sizes]], + "ceil_mode": [[str(1 if ceil_mode else 0)]] + }, + } - return codegen + input = { + "op": "input", + "name": "", + "attrs": {"shape": [[list(shape)]], "dtype": [["float32"]]}} + return [input, node] def test_pooling(): @@ -80,6 +72,7 @@ def test_pooling(): return device = Device() + np.random.seed(0) for size in [(2, 2), (3, 3)]: for stride in [(2, 2)]: diff --git a/tests/python/contrib/test_acl/test_reshape.py b/tests/python/contrib/test_arm_compute_lib/test_reshape.py similarity index 82% rename from tests/python/contrib/test_acl/test_reshape.py rename to tests/python/contrib/test_arm_compute_lib/test_reshape.py index 81192cdf992c8..0d0656eaa7600 100644 --- a/tests/python/contrib/test_acl/test_reshape.py +++ b/tests/python/contrib/test_arm_compute_lib/test_reshape.py @@ -34,22 +34,26 @@ def _get_model(input_shape, output_shape, var_names): def _get_expected_codegen(input_shape, output_shape): - codegen = { + node = { + "op": "kernel", "name": "reshape", - "inputs": [], - "outputs": [], - "attrs": {} + "inputs": [[0, 0, 0]], + "attrs": { + "num_inputs": "1", + "num_outputs": "1", + "newshape": [[str(s) for s in output_shape]], + "shape": [[list(output_shape)]], + "dtype": [["float32"]], + "reverse": [["0"]] + }, } - inputs = [{"type": "var", "shape": list(input_shape)}] - outputs = [{"type": "var", "shape": list(output_shape)}] + input = { + "op": "input", + "name": "", + "attrs": {"shape": [[list(input_shape)]], "dtype": [["float32"]]}} - codegen["inputs"] = inputs - codegen["outputs"] = outputs - codegen["attrs"]["num_inputs"] = ["Size_t", len(inputs)] - codegen["attrs"]["num_outputs"] = ["Size_t", len(outputs)] - - return codegen + return [input, node] def test_reshape(): @@ -57,6 +61,7 @@ def test_reshape(): return device = Device() + np.random.seed(0) inputs = { "a": tvm.nd.array( diff --git a/tests/python/contrib/test_acl/test_runtime.py b/tests/python/contrib/test_arm_compute_lib/test_runtime.py similarity index 99% rename from tests/python/contrib/test_acl/test_runtime.py rename to tests/python/contrib/test_arm_compute_lib/test_runtime.py index 7b332730e9538..7e4714bafbcb0 100644 --- a/tests/python/contrib/test_acl/test_runtime.py +++ b/tests/python/contrib/test_arm_compute_lib/test_runtime.py @@ -34,6 +34,7 @@ def test_multiple_ops(): return device = Device() + np.random.seed(0) def get_model(input_shape, var_names): """Return a model and any parameters it may have."""