diff --git a/python/tvm/relay/backend/te_compiler.py b/python/tvm/relay/backend/te_compiler.py index 0dc48b695acf..6c3930505a5c 100644 --- a/python/tvm/relay/backend/te_compiler.py +++ b/python/tvm/relay/backend/te_compiler.py @@ -415,7 +415,7 @@ def get(): def lower_to_primfunc(relay_func, target): - """Lowers Relay Function to TIR PrimFunc. + """Lower Relay Function to TIR PrimFunc. Parameters ---------- @@ -423,7 +423,7 @@ def lower_to_primfunc(relay_func, target): The source primitive function, created by FuseOps. target : Target - The target we want to create schedule for. + The target we want to create a schedule for. Returns ------- diff --git a/src/relay/backend/te_compiler_cache.cc b/src/relay/backend/te_compiler_cache.cc index 877976065ea2..2480594d5ece 100644 --- a/src/relay/backend/te_compiler_cache.cc +++ b/src/relay/backend/te_compiler_cache.cc @@ -1099,6 +1099,7 @@ std::pair, std::string> LowerToPrimFunc(const Function& tir::PrimFunc LowerToPrimFunc(const Function& relay_func, Target target) { auto [f_opt, _] = LowerToPrimFunc(relay_func, target, NameSupply("")); + (void)_; // to suppress -Werror=unused-variable warning if (f_opt) { return f_opt.value(); } diff --git a/src/relay/backend/te_compiler_cache.h b/src/relay/backend/te_compiler_cache.h index 6192dd21fb54..0e4a77c16354 100644 --- a/src/relay/backend/te_compiler_cache.h +++ b/src/relay/backend/te_compiler_cache.h @@ -212,9 +212,9 @@ class CCacheValue : public ObjectRef { Array GetShape(const Array& shape); /*! - * \brief Lowers Relay primitive Function to TE Compute + * \brief Lower Relay primitive Function to TE Compute * \param source_func The primitive function to be lowered. - * \param target The target we want to create schedule for. + * \param target The target we want to create a schedule for. * \param constant_name_supply A name supplier for constants * across different invocations of this function. * \param return_inputs If true, prepend input tensors to the output array of tensors. @@ -225,9 +225,9 @@ std::tuple, Array, std::string> LowerTECompu bool return_inputs = true); /*! - * \brief Lowers Relay Function to TIR PrimFunc, by composing LowerTECompute and CreatePrimFunc. + * \brief Lower Relay Function to TIR PrimFunc, by composing LowerTECompute and CreatePrimFunc. * \param relay_func The primitive function to be lowered. - * \param target The target we want to create schedule for. + * \param target The target we want to create a schedule for. * \param constant_name_supply A name supplier for constants * across different invocations of this function. * \return A pair of the created prim func and the name of the fused function. @@ -239,7 +239,7 @@ std::pair, std::string> LowerToPrimFunc(const Function& /*! * \brief Create schedule for target. * \param source_func The primitive function to be lowered. - * \param target The target we want to create schedule for. + * \param target The target we want to create a schedule for. * \param global_var_supply A name supplier for global variables. * \param constant_name_supply A name supplier for constants. * \return Pair of schedule and cache. diff --git a/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py b/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py index 92e3cbd66e2f..0a8a0dd59fbf 100644 --- a/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py +++ b/tests/python/unittest/test_tir_transform_plan_update_buffer_allocation_location.py @@ -14,10 +14,15 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import numpy as np + import tvm import tvm.testing from tvm import te from tvm.script import tir as T +from tvm import relay, tir +from tvm.relay.backend.te_compiler import lower_to_primfunc +from tvm.tir.tensor_intrin.hexagon import VRMPY_u8u8i32_INTRIN def _check(original, transformed): @@ -360,5 +365,56 @@ def after(A: T.Buffer[(4, 16), "int32"], C: T.Buffer[(4, 8), "int32"]): _check(before, after) +def test_allocate_const_after_tensorize(): + i_size, o_size, h_size, w_size = 64, 64, 56, 56 + k_height_size = k_width_size = 3 + w_shape = (o_size, i_size, k_height_size, k_width_size) + + data = relay.var("data", shape=(1, i_size, h_size, w_size), dtype="uint8") + weight = relay.var("weight", shape=w_shape, dtype="uint8") + conv2d = relay.nn.conv2d( + data=data, + weight=weight, + kernel_size=(k_height_size, k_width_size), + channels=o_size, + padding=(0, 0), + strides=(1, 1), + out_dtype="int32", + ) + mod = tvm.IRModule.from_expr(conv2d) + + executor = relay.backend.Executor("graph", {"link-params": True}) + mod = mod.with_attr("executor", executor) + + weight_np = np.random.uniform(1, 10, size=w_shape).astype("uint8") + + target = tvm.target.Target("hexagon") + + with tvm.transform.PassContext(opt_level=3): + opt_mod, _ = relay.optimize(mod, params={"weight": weight_np}, target=target) + + conv2d_func = opt_mod["main"].body.args[0].op + prim_func = lower_to_primfunc(conv2d_func, target) + + sch = tir.Schedule(prim_func) + block = sch.get_block("conv2d_NCHWc_int8") + loops = sch.get_loops(block) + + sch.reorder(loops[8], loops[4], loops[-1]) + sch.decompose_reduction(block, loops[1]) + sch.tensorize(loops[4], VRMPY_u8u8i32_INTRIN) + + seq = tvm.transform.Sequential( + [ + tvm.tir.transform.LowerInitBlock(), + tvm.tir.transform.PlanAndUpdateBufferAllocationLocation(), + ] + ) + + # The following error is emitted if AllocateConst nodes are not correctly handled: + # Check failed: (buffer_data_to_buffer_.count(source_var)) is false: + _ = seq(sch.mod) + + if __name__ == "__main__": tvm.testing.main()