diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc index 573206c782f6b..b4578c93f501d 100644 --- a/src/relay/backend/aot_executor_codegen.cc +++ b/src/relay/backend/aot_executor_codegen.cc @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -44,52 +45,179 @@ namespace tvm { namespace relay { namespace backend { +/** + * Struct to contain information about intermediate variables in the + * runner function + */ +struct StorageInfo { + /*! \brief unique integer identifier of the particular intermediate variable */ + std::vector ids; + /*! \brief exact size of the temporary */ + std::vector sizes_bytes; + /*! \brief device type of the temporary variable */ + std::vector dev_types; +}; + using IntegerArray = Array; using TargetsMap = std::unordered_map; +using StorageMap = + std::unordered_map; -class AotReturnSidVisitor : public ExprVisitor { +/** + * This is an on demand allocator for AOT. A new temporary + * (storage allocator identifier) is allocated for each operation. + */ +class AOTOnDemandAllocator : public ExprVisitor { public: - explicit AotReturnSidVisitor(Map> storage_device_map) - : storage_device_map_{storage_device_map}, return_sid_{-1} {} + // run the visitor on a function. + void Run(const Function& func) { + node_device_map_ = CollectDeviceInfo(func); - IntegerArray FindReturnSid(Function func) { - VisitExpr(func->body); - return return_sid_; + for (Expr param : func->params) { + CreateSid(param.operator->()); + } + + GetSid(func->body); } - protected: - void AssignReturnSid(Expr e) { - auto iter = storage_device_map_.find(e); - if (iter != storage_device_map_.end()) { - return_sid_ = (*iter).second[0]; + std::vector GetReturnIds() const { return return_ids_; } + + StorageMap GetStorageMap() const { return storage_device_map_; } + + void VisitExpr_(const ConstantNode* op) final { + CreateSid(op); + AssignReturnSid(GetRef(op)); + } + + void VisitExpr_(const CallNode* op) final { + // create token for the call node. + CreateSid(op); + for (Expr arg : op->args) { + GetSid(arg); } + AssignReturnSid(GetRef(op)); } - void VisitExpr_(const ConstantNode* cn) override { - ExprVisitor::VisitExpr_(cn); - AssignReturnSid(GetRef(cn)); + void VisitExpr_(const VarNode* op) final { + ExprVisitor::VisitExpr_(op); + AssignReturnSid(GetRef(op)); } - void VisitExpr_(const VarNode* vn) override { - ExprVisitor::VisitExpr_(vn); - AssignReturnSid(GetRef(vn)); + void VisitExpr_(const FunctionNode* op) final { + // do not recurse into sub function. } - void VisitExpr_(const CallNode* cn) override { - ExprVisitor::VisitExpr_(cn); - AssignReturnSid(GetRef(cn)); + void VisitExpr_(const GlobalVarNode* op) final { + // Do nothing. } - void VisitExpr_(const LetNode* op) override { VisitExpr(op->body); } + void VisitExpr_(const OpNode* op) final { + // Do nothing. + } + + void VisitExpr_(const TupleNode* op) final { + StorageInfo field_sid; + Expr expr = GetRef(op); + for (Expr field : op->fields) { + auto sid = GetSid(field); + field_sid.ids.insert(field_sid.ids.end(), sid.ids.begin(), sid.ids.end()); + field_sid.dev_types.insert(field_sid.dev_types.end(), sid.dev_types.begin(), + sid.dev_types.end()); + field_sid.sizes_bytes.insert(field_sid.sizes_bytes.end(), sid.sizes_bytes.begin(), + sid.sizes_bytes.end()); + } + + storage_device_map_[expr] = field_sid; + AssignReturnSid(expr); + } - void VisitExpr_(const TupleNode* tn) override { - ExprVisitor::VisitExpr_(tn); - AssignReturnSid(GetRef(tn)); + void VisitExpr_(const TupleGetItemNode* op) final { + Expr expr = GetRef(op); + const auto& sid = GetSid(op->tuple); + ICHECK_LT(static_cast(op->index), sid.ids.size()); + storage_device_map_[expr].ids = {sid.ids[op->index]}; + storage_device_map_[expr].sizes_bytes = {sid.sizes_bytes[op->index]}; + storage_device_map_[expr].dev_types = {sid.dev_types[op->index]}; + AssignReturnSid(expr); } + void VisitExpr_(const IfNode* op) final { LOG(FATAL) << "if is not supported."; } + + void VisitExpr_(const LetNode* op) final { LOG(FATAL) << "if is not supported."; } + private: - Map> storage_device_map_; - IntegerArray return_sid_; + void AssignReturnSid(Expr e) { + auto iter = storage_device_map_.find(e); + if (iter != storage_device_map_.end()) { + return_ids_ = (*iter).second.ids; + } + } + /*! + * \brief ceil(size/word_size) to get number of words. + * \param size The original size. + * \param word_size The element size. + */ + static size_t DivRoundUp(size_t size, size_t word_size) { + return (size + word_size - 1) / word_size; + } + /*! + * \brief Get the memory requirement. + * \param prototype The prototype token. + * \return The required memory size. + */ + size_t GetMemorySize(const TensorTypeNode* ttype) { + ICHECK(ttype != nullptr); + size_t size = 1; + for (IndexExpr dim : ttype->shape) { + const int64_t* pval = tir::as_const_int(dim); + ICHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape; + ICHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval; + size *= static_cast(pval[0]); + } + size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8); + return size; + } + /*! + * \brief Get the necessary token. + * \param expr The expression. + * \return The corresponding token. + */ + StorageInfo GetSid(const Expr& expr) { + this->VisitExpr(expr); + auto it = storage_device_map_.find(expr); + ICHECK(it != storage_device_map_.end()); + return it->second; + } + + void CreateSid(const ExprNode* op) { + StorageInfo sid; + Expr expr = GetRef(op); + int device_type = node_device_map_.count(GetRef(op)) ? node_device_map_[expr]->value : 0; + if (const auto* tuple_type = op->checked_type().as()) { + for (Type t : tuple_type->fields) { + const auto* ttype = t.as(); + ICHECK(ttype); + sid.ids.push_back(sid_++); + sid.dev_types.push_back(device_type); + sid.sizes_bytes.push_back(GetMemorySize(ttype)); + } + } else { + const auto* ttype = op->checked_type().as(); + ICHECK(ttype); + sid.ids.push_back(sid_++); + sid.dev_types.push_back(device_type); + sid.sizes_bytes.push_back(GetMemorySize(ttype)); + } + storage_device_map_[expr] = sid; + } + /*! \brief mapping of expression -> storageInfo*/ + StorageMap storage_device_map_; + /*! \brief mapping of expression -> device type*/ + Map node_device_map_; + /*! \brief current id of the temporary allocated*/ + int sid_{0}; + /*! \brief the set of identifiers that are return variables */ + std::vector return_ids_; }; /*! \brief Code generator for AOT executor */ @@ -120,14 +248,14 @@ class AOTExecutorCodegen : public ExprVisitor { * \brief Return a vector of variables that represents the sids for the given Relay Expr */ std::vector PackSid(Expr expr) { - Array sids = storage_device_map_[expr]; + auto sids = storage_device_map_[expr]; std::vector sid_vars; // Note that an expression can have multiple sids associated with it // e.g., returning multiple values from a function - for (const auto& sid : sids[0]) { + for (const auto& sid : sids.ids) { // Determine if an sid is an output buffer - int sid_int = static_cast((sid.as())->value); + int sid_int = sid; auto output_iter = std::find(return_sid_.begin(), return_sid_.end(), sid_int); if (output_iter != return_sid_.end()) { int output_index = std::distance(return_sid_.begin(), output_iter); @@ -390,8 +518,8 @@ class AOTExecutorCodegen : public ExprVisitor { } ICHECK_GE(storage_device_map_.count(expr), 0); - auto& device_type = storage_device_map_[expr][1]; - auto call_dev_type = device_type[0]->value; + auto& device_type = storage_device_map_[expr].dev_types; + auto call_dev_type = device_type[0]; // Normal Relay Function if (targets_.size() == 1) { // homogeneous execution. @@ -428,14 +556,14 @@ class AOTExecutorCodegen : public ExprVisitor { // If the Var node is an output node we need to copy the content of the variable to the output // It's safe to check the SID here because Var StorageToken are never reallocated - Array sids = storage_device_map_[expr]; + auto sids = storage_device_map_[expr]; - auto output_iter = std::find(return_sid_.begin(), return_sid_.end(), - static_cast((sids[0][0].as())->value)); + auto output_iter = std::find(return_sid_.begin(), return_sid_.end(), sids.ids[0]); if (output_iter != return_sid_.end()) { int output_index = std::distance(return_sid_.begin(), output_iter); auto var_expr = FindExpr(expr); - CopyToOutput(main_signature_[input_vars_.size() + output_index], var_expr[0], sids[2][0]); + CopyToOutput(main_signature_[input_vars_.size() + output_index], var_expr[0], + sids.sizes_bytes[0]); } } @@ -444,18 +572,18 @@ class AOTExecutorCodegen : public ExprVisitor { size_t index = params_.size(); std::string name = "p" + std::to_string(index); - param_storage_ids_[name] = storage_device_map_[expr][0][0]->value; + param_storage_ids_[name] = storage_device_map_[expr].ids[0]; params_[name] = op->data; params_by_expr_.Set(expr, name); // If the Constant node is an output node we need to copy the content of the parameter to the // output A Var node can only produce a single output - Array sids = storage_device_map_[expr]; - auto output_iter = std::find(return_sid_.begin(), return_sid_.end(), - static_cast((sids[0][0].as())->value)); + auto sids = storage_device_map_[expr]; + auto output_iter = std::find(return_sid_.begin(), return_sid_.end(), sids.ids[0]); if (output_iter != return_sid_.end()) { int output_index = std::distance(return_sid_.begin(), output_iter); - CopyToOutput(main_signature_[input_vars_.size() + output_index], PackParam(expr), sids[2][0]); + CopyToOutput(main_signature_[input_vars_.size() + output_index], PackParam(expr), + sids.sizes_bytes[0]); } } @@ -511,9 +639,9 @@ class AOTExecutorCodegen : public ExprVisitor { continue; } - for (unsigned int i = 0; i < kv.second[0].size(); i++) { - int size = kv.second[2][i]; - int sid = static_cast((kv.second[0][i].as())->value); + for (unsigned int i = 0; i < kv.second.ids.size(); i++) { + int size = kv.second.sizes_bytes[i]; + int sid = kv.second.ids[i]; if (std::find(return_sid_.begin(), return_sid_.end(), sid) != return_sid_.end()) { continue; @@ -523,6 +651,8 @@ class AOTExecutorCodegen : public ExprVisitor { // so we don't pay the price of allocation for every inference if (!allocated[sid]) { body = tir::Allocate(sids_table_[sid], DataType::Int(8), {size}, tir::const_true(), body); + body = tir::AttrStmt(sids_table_[sid], tir::attr::storage_scope, tir::StringImm("global"), + body); } allocated[sid] = true; } @@ -566,7 +696,8 @@ class AOTExecutorCodegen : public ExprVisitor { std::unordered_map param_storage_ids_; /*! \brief plan memory of device result */ - Map> storage_device_map_; + StorageMap storage_device_map_; + /*! \brief mapping sid -> tir::Var */ std::unordered_map sids_table_; /*! \brief lowered funcs */ std::unordered_map lowered_funcs_; @@ -577,7 +708,7 @@ class AOTExecutorCodegen : public ExprVisitor { /*! \brief the set of statements that make the program */ std::vector stmts_; /*! \brief the list of return sids (note that the function might return more then one output */ - IntegerArray return_sid_; + std::vector return_sid_; public: AOTExecutorCodegen(runtime::Module* mod, const TargetsMap& targets, Target target_host) @@ -588,9 +719,11 @@ class AOTExecutorCodegen : public ExprVisitor { } LoweredOutput Codegen(relay::Function func) { - // Get the module, storage map and token sizes - auto pf = GetPackedFunc("relay.backend.GraphPlanMemory"); - storage_device_map_ = (*pf)(func); + auto aot_allocator = AOTOnDemandAllocator(); + aot_allocator.Run(func); + + // Retrieve the storage map + storage_device_map_ = aot_allocator.GetStorageMap(); int input_index = 0; for (auto input : func->params) { @@ -600,14 +733,14 @@ class AOTExecutorCodegen : public ExprVisitor { // Define the storage allocator ids for (auto kv : storage_device_map_) { - for (const auto& sid : kv.second[0]) { + for (const auto& sid : kv.second.ids) { te::Var sid_var(MakeString("sid_", sid), PointerType(PrimType(DataType::Int(8)))); sids_table_[sid] = sid_var; } } - // Find the return sid - return_sid_ = AotReturnSidVisitor(storage_device_map_).FindReturnSid(func); + // Retrieve the return sids + return_sid_ = aot_allocator.GetReturnIds(); for (unsigned int output_index = 0; output_index < return_sid_.size(); output_index++) { main_signature_.push_back(tir::Var(MakeString("output_", output_index), DataType::Handle())); } @@ -635,14 +768,21 @@ class AOTExecutorCodegen : public ExprVisitor { } ret.external_mods = compile_engine_->LowerExternalFunctions(); + // Build the TIR IRModule + Map symbol_map; + symbol_map.Set(GlobalVar(::tvm::runtime::symbol::tvm_run_func_prefix), prim_func); + IRModule mod_run(symbol_map); + + // Apply storage rewrite pass to the runner function to do memory planning + auto storage_rewrite = tir::transform::StorageRewrite(); + mod_run = storage_rewrite(mod_run); + + // Update the lowered functions auto target_host_str = target_host_->str(); if (ret.lowered_funcs.find(target_host_str) != ret.lowered_funcs.end()) { - ret.lowered_funcs[target_host_str]->Add( - GlobalVar(::tvm::runtime::symbol::tvm_run_func_prefix), prim_func); + ret.lowered_funcs[target_host_str]->Update(mod_run); } else { - Map symbol_map; - symbol_map.Set(GlobalVar(::tvm::runtime::symbol::tvm_run_func_prefix), prim_func); - ret.lowered_funcs.Set(target_host_str, IRModule(symbol_map)); + ret.lowered_funcs.Set(target_host_str, mod_run); } ret.function_metadata = std::move(function_metadata_); ret.metadata = diff --git a/tests/python/relay/aot/aot_test_utils.py b/tests/python/relay/aot/aot_test_utils.py index 8c7aefe70d091..e7358828c8f81 100644 --- a/tests/python/relay/aot/aot_test_utils.py +++ b/tests/python/relay/aot/aot_test_utils.py @@ -36,6 +36,46 @@ from tvm.micro import export_model_library_format +def convert_to_relay( + tflite_model_buf, + input_data, + input_node, +): + """ Convert a tflite model buffer in a Relay module """ + + def convert_to_list(x): + if not isinstance(x, list): + x = [x] + return x + + # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1 + try: + import tflite.Model + + tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0) + except AttributeError: + import tflite + + tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0) + except ImportError: + raise ImportError("The tflite package must be installed") + + input_data = convert_to_list(input_data) + input_node = convert_to_list(input_node) + + shape_dict = {} + dtype_dict = {} + for i, e in enumerate(input_node): + shape_dict[e] = input_data[i].shape + dtype_dict[e] = input_data[i].dtype.name + + mod, params = relay.frontend.from_tflite( + tflite_model, shape_dict=shape_dict, dtype_dict=dtype_dict + ) + mod["main"] = relay.build_module.bind_params_by_name(mod["main"], params) + return mod, params + + def subprocess_with_stdout_and_log(cmd, cwd, logfile, stdout): """ This method runs a process and logs the output to both a log file and stdout diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py index 02b4de3a64f34..3989570da936b 100644 --- a/tests/python/relay/aot/test_crt_aot.py +++ b/tests/python/relay/aot/test_crt_aot.py @@ -364,5 +364,26 @@ def test_byoc_utvm(use_calculated_workspaces): compile_and_run(mod, input_list, output_list, use_calculated_workspaces) +def test_quant_mobilenet_tfl(): + pytest.importorskip("tflite") + + import tvm.relay.testing.tf as tf_testing + tflite_model_file = tf_testing.get_workload_official( + "https://storage.googleapis.com/download.tensorflow.org/" + "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz", + "mobilenet_v1_1.0_224_quant.tflite", + ) + with open(tflite_model_file, "rb") as f: + tflite_model_buf = f.read() + data_shape = (1, 224, 224, 3) + in_min, in_max = (0, 255) + data = np.random.randint(in_min, high=in_max, size=data_shape, dtype="uint8") + mod, params = convert_to_relay(tflite_model_buf, data, "input") + inputs = {"input": data} + output_list = generate_ref_data(mod, inputs, params) + input_list = [inputs["input"]] + compile_and_run(mod, input_list, output_list, True, params) + + if __name__ == "__main__": pytest.main([__file__])