From c04a051aa17885cc82ead08baf27131e7301c307 Mon Sep 17 00:00:00 2001 From: Matthew Brookhart Date: Thu, 9 Sep 2021 09:50:41 -0600 Subject: [PATCH] [TIR][VM] Revert a change to lower_tvm_builtin.cc from #6126 (#8274) * revert a change to lower_tvm_builtin.cc from #6126 * disable sim target on VTA tutorial fix bad refactor try again --- src/tir/transforms/lower_tvm_builtin.cc | 10 ++++++++++ vta/tutorials/frontend/deploy_classification.py | 6 +++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/tir/transforms/lower_tvm_builtin.cc b/src/tir/transforms/lower_tvm_builtin.cc index f5a553aa0598..99d71ebe15bd 100644 --- a/src/tir/transforms/lower_tvm_builtin.cc +++ b/src/tir/transforms/lower_tvm_builtin.cc @@ -113,6 +113,16 @@ class BuiltinLower : public StmtExprMutator { op = stmt.as(); // Get constant allocation bound. int64_t nbytes = GetVectorBytes(op->dtype); + if (device_type_.defined()) { + if (const auto* dev_type = device_type_.as()) { + if (dev_type->value == kDLCPU) { + int32_t constant_size = op->constant_allocation_size(); + if (constant_size > 0 && constant_size * nbytes < runtime::kMaxStackAlloca) { + return stmt; + } + } + } + } PrimExpr total_bytes = make_const(op->extents[0].dtype(), nbytes); for (size_t i = 0; i < op->extents.size(); ++i) { total_bytes = total_bytes * op->extents[i]; diff --git a/vta/tutorials/frontend/deploy_classification.py b/vta/tutorials/frontend/deploy_classification.py index 139e30333f1e..572aaee7c3b4 100644 --- a/vta/tutorials/frontend/deploy_classification.py +++ b/vta/tutorials/frontend/deploy_classification.py @@ -191,7 +191,7 @@ env.WGT_WIDTH, start_name=pack_dict[model][0], stop_name=pack_dict[model][1], - device_annot=(env.TARGET == "intelfocl" or env.TARGET == "sim"), + device_annot=(env.TARGET == "intelfocl"), ) else: relay_prog = mod["main"] @@ -203,7 +203,7 @@ relay_prog, target=target, params=params, target_host=env.target_host ) else: - if env.TARGET == "intelfocl" or env.TARGET == "sim": + if env.TARGET == "intelfocl": # multiple targets to run both on cpu and vta target = {"cpu": env.target_vta_cpu, "ext_dev": target} with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}): @@ -221,7 +221,7 @@ remote.upload(temp.relpath("graphlib.tar")) lib = remote.load_module("graphlib.tar") - if env.TARGET == "intelfocl" or env.TARGET == "sim": + if env.TARGET == "intelfocl": ctxes = [remote.ext_dev(0), remote.cpu(0)] m = graph_executor.create(graph, lib, ctxes) else: