Skip to content

Commit

Permalink
[uTVM] Initial BYOC support with c-source module
Browse files Browse the repository at this point in the history
This commit mainly introduces a byoc c-source module
example to uTVM. Moreover, it carries certain modifications
to the example codegen_c external module generator code
to generate utvm friendly c-source.

Change-Id: I09f3a42017d518dd5b6c89e3fe0a0332b80088b0
  • Loading branch information
manupak committed Jan 12, 2021
1 parent b84eb16 commit 16a51f9
Show file tree
Hide file tree
Showing 3 changed files with 229 additions and 27 deletions.
25 changes: 15 additions & 10 deletions src/relay/backend/contrib/codegen_c/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,7 @@ class CodegenC : public MemoizedExprTranslator<std::vector<Output>>, public Code
for (size_t i = 0; i < out_shape.size(); ++i) {
out_size *= out_shape[i];
}
buf_stream << dtype << "* " << out << " = (" << dtype << "*)std::malloc(4 * " << out_size
<< ");";
buf_stream << dtype << "* " << out << " = (" << dtype << "*)malloc(4 * " << out_size << ");";
buf_decl_.push_back(buf_stream.str());

decl_stream << ", " << out << ");";
Expand Down Expand Up @@ -229,25 +228,31 @@ class CSourceCodegen : public CSourceModuleCodegenBase {
String func_name = std::get<1>(res);

// Create headers
code_stream_ << "#include <cstring>\n";
code_stream_ << "#include <vector>\n";
code_stream_ << "#include <stdio.h>\n";
code_stream_ << "#include <stdlib.h>\n";
code_stream_ << "#include <string.h>\n";
code_stream_ << "#include <tvm/runtime/c_runtime_api.h>\n";
code_stream_ << "#include <tvm/runtime/container.h>\n";
code_stream_ << "#include <tvm/runtime/packed_func.h>\n";
code_stream_ << "#include <dlpack/dlpack.h>\n";
code_stream_ << "using namespace tvm::runtime;\n";
code_stream_ << "#include <tvm/runtime/c_backend_api.h>\n";
code_stream_ << "#include <tvm/runtime/crt/module.h>\n";
if (!variables.empty()) {
// These are only needed to handle metadata copying
code_stream_ << "#include <tvm/runtime/container.h>\n";
code_stream_ << "#include <tvm/runtime/packed_func.h>\n";
code_stream_ << "#include <dlpack/dlpack.h>\n";
code_stream_ << "using namespace tvm::runtime;\n";
}

// Append some common macro for operator definition.
const char* operator_macro = R"op_macro(
#define CSOURCE_BINARY_OP_1D(p_ID_, p_OP_, p_DIM1_, p_DTYPE) \
extern "C" void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \
void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \
for (int64_t i = 0; i < p_DIM1_; ++i) { \
out[i] = a[i] p_OP_ b[i]; \
} \
}
#define CSOURCE_BINARY_OP_2D(p_ID_, p_OP_, p_DIM1_, p_DIM2_, p_DTYPE) \
extern "C" void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \
void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \
for (int64_t i = 0; i < p_DIM1_; ++i) { \
for (int64_t j = 0; j < p_DIM2_; ++j) { \
int64_t k = i * p_DIM2_ + j; \
Expand Down
91 changes: 74 additions & 17 deletions src/relay/backend/contrib/codegen_c/codegen_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,40 @@ class CodegenCBase {
indent_ -= 2;
}

/*!
* \brief Creates a runtime function header
*/
void PrintRuntimeFunctionHeader(std::string func_name) {
code_stream_ << "#ifdef __cplusplus\n";
code_stream_ << "extern \"C\" {\n";
code_stream_ << "#endif\n";
code_stream_ << "TVM_DLL int32_t ";
code_stream_ << func_name << "(";
code_stream_ << "TVMValue* args, ";
code_stream_ << "int* type_code, ";
code_stream_ << "int num_args, ";
code_stream_ << "TVMValue* out_value, ";
code_stream_ << "int* out_type_code) {\n";
}

/*!
* \brief Adds a line to convert TVMValue args to DLTensors
*/
void PrintArgToData(int idx) {
PrintIndents();
code_stream_ << "DLTensor* arg" << idx << " = ";
code_stream_ << "(DLTensor*)(((TVMValue*)args)[" << idx << "].v_handle);\n";
}

/*!
* \brief Adds a line to convert TVMValue rets to DLTensors
*/
void PrintRetToData(int idx) {
PrintIndents();
code_stream_ << "DLTensor* ret" << idx << " = ";
code_stream_ << "(DLTensor*)(((TVMValue*)args)[" << idx << "].v_handle);\n";
}

/*!
* \brief Gerenate C code for the external function.
*
Expand All @@ -100,12 +134,12 @@ class CodegenCBase {
* Array<NDArray> foo_consts;
*
* // An example code for the generated C function.
* extern "C" int foo_wrapper_(DLTensor* arg0,
* int foo_wrapper_(DLTensor* arg0,
* DLTensor* arg1,
* DLTensor* out) {
* foo_(static_cast<float*>(arg0->data),
* static_cast<float*>(arg1->data),
* static_cast<float*>(out->data));
* foo_((float*)(arg0->data),
* (float*)(arg1->data),
* (float*)(out->data));
* return 0;
* }
*
Expand All @@ -124,7 +158,8 @@ class CodegenCBase {
const std::string& const_arr_name, const std::vector<Output>& outs) {
// Print signature
code_stream_ << "\n";
code_stream_ << "extern \"C\" int " << func_name << "_wrapper_(";

code_stream_ << "int " << func_name << "_wrapper_(";
for (size_t i = 0; i < args.size(); i++) {
code_stream_ << "DLTensor* arg" << i << ",\n";
code_stream_ << "\t";
Expand All @@ -142,25 +177,47 @@ class CodegenCBase {
code_stream_ << func_name << "_(";
for (size_t i = 0; i < args.size(); i++) {
const auto& dtype_str = GetDtypeString(args[i]);
code_stream_ << "static_cast<" << dtype_str << "*>(arg" << i << "->data),\n";
code_stream_ << "(" << dtype_str << "*)(arg" << i << "->data),\n";
PrintIndents();
}
for (size_t i = 0; i < outs.size() - 1; i++) {
code_stream_ << "static_cast<" << outs[i].dtype << "*>(out" << i << "->data),\n";
code_stream_ << "(" << outs[i].dtype << "*)(out" << i << "->data),\n";
PrintIndents();
}
code_stream_ << "static_cast<" << outs.back().dtype << "*>(out" << outs.size() - 1
<< "->data));\n";
code_stream_ << "(" << outs.back().dtype << "*)(out" << outs.size() - 1 << "->data));\n";
PrintIndents();
code_stream_ << "return 0;\n";
ExitScope();
code_stream_ << "}\n\n";

// Generate the macro
code_stream_ << "TVM_DLL_EXPORT_TYPED_FUNC(" << func_name << ", " << func_name
<< "_wrapper_);\n\n";
// Create the external function
PrintRuntimeFunctionHeader(func_name);
EnterScope();
for (size_t i = 0; i < args.size(); i++) {
PrintArgToData(i);
}
for (size_t i = 0; i < outs.size(); i++) {
PrintRetToData(args.size() + i);
}
PrintIndents();
code_stream_ << func_name << "_wrapper_(";
for (size_t i = 0; i < args.size(); i++) {
code_stream_ << "arg" << i << ",";
}
for (size_t i = 0; i < outs.size() - 1; i++) {
code_stream_ << "ret" << args.size() + i << ",";
}
code_stream_ << "ret" << args.size() + outs.size() - 1 << ");\n";
PrintIndents();
code_stream_ << "return 0;\n";
ExitScope();
code_stream_ << "}\n";
code_stream_ << "#ifdef __cplusplus\n";
code_stream_ << "}\n";
code_stream_ << "#endif\n";

if (!const_arr_name.empty()) {
// If there are constants, insert the __init_ and the wrapper
code_stream_ << "int " << func_name << "_init_wrapper_(Array<NDArray> arr) {\n";
EnterScope();
PrintIndents();
Expand Down Expand Up @@ -205,8 +262,8 @@ class CodegenCBase {
code_stream_ << const_arr_name << "\n\n";
}
// Create the signature. For example, it could be:
// extern "C" void dnnl_0_(float* in0, float* in1, float* out0, float* out1) {}
code_stream_ << "extern \"C\" void " << ext_func_id << "_(";
// void dnnl_0_(float* in0, float* in1, float* out0, float* out1) {}
code_stream_ << "void " << ext_func_id << "_(";

for (const auto& arg : args) {
const auto& dtype_str = GetDtypeString(arg);
Expand Down Expand Up @@ -235,14 +292,14 @@ class CodegenCBase {
continue;
}
this->PrintIndents();
code_stream_ << "std::memcpy(out" << i << ", " << outs[i].name << ", 4 * " << outs[i].size
code_stream_ << "memcpy(out" << i << ", " << outs[i].name << ", 4 * " << outs[i].size
<< ");\n";
}

// Free buffers
for (size_t i = 0; i < buf_decl.size(); i++) {
this->PrintIndents();
code_stream_ << "std::free(buf_" << i << ");\n";
code_stream_ << "free(buf_" << i << ");\n";
}

this->ExitScope();
Expand Down Expand Up @@ -322,7 +379,7 @@ class CodegenCBase {
* \return The created reference
*/
std::string CreateDataReference(const std::string& symbol, int const_id) const {
return "static_cast<float*>(" + symbol + "_consts[" + std::to_string(const_id) + "]->data)";
return "(float*)(" + symbol + "_consts[" + std::to_string(const_id) + "]->data)";
}

/*!
Expand Down
140 changes: 140 additions & 0 deletions tests/micro/qemu/test_zephyr.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@

from tvm.micro.contrib import zephyr
from tvm.contrib import utils
from tvm.relay.expr_functor import ExprMutator
from tvm.relay.op.annotation import compiler_begin, compiler_end

BUILD = True
DEBUG = False
Expand Down Expand Up @@ -198,5 +200,143 @@ def test_relay(platform):
tvm.testing.assert_allclose(result, x_in * x_in + 1)


class CcompilerAnnotator(ExprMutator):
"""
This is used to create external functions for ccompiler.
A simple annotator that creates the following program:
|
-- begin --
|
add
|
subtract
|
multiply
|
-- end --
|
"""

def __init__(self):
super(CcompilerAnnotator, self).__init__()
self.in_compiler = 0

def visit_call(self, call):
if call.op.name == "add": # Annotate begin at args
if self.in_compiler == 1:
lhs = compiler_begin(super().visit(call.args[0]), "ccompiler")
rhs = compiler_begin(super().visit(call.args[1]), "ccompiler")
op = relay.add(lhs, rhs)
self.in_compiler = 2
return op
elif call.op.name == "subtract":
if self.in_compiler == 1:
lhs = super().visit(call.args[0])
rhs = super().visit(call.args[1])
if isinstance(lhs, relay.expr.Var):
lhs = compiler_begin(lhs, "ccompiler")
if isinstance(rhs, relay.expr.Var):
rhs = compiler_begin(rhs, "ccompiler")
return relay.subtract(lhs, rhs)
elif call.op.name == "multiply": # Annotate end at output
self.in_compiler = 1
lhs = super().visit(call.args[0])
rhs = super().visit(call.args[1])
if isinstance(lhs, relay.expr.Var):
lhs = compiler_begin(lhs, "ccompiler")
if isinstance(rhs, relay.expr.Var):
rhs = compiler_begin(rhs, "ccompiler")
op = relay.multiply(lhs, rhs)
if self.in_compiler == 2:
op = compiler_end(op, "ccompiler")
self.in_compiler = 0
return op
return super().visit_call(call)


def check_result(relay_mod, model, zephyr_board, map_inputs, out_shape, result):
"""Helper function to verify results"""
tol = 1e-5
target = tvm.target.target.micro(model)
with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
graph, mod, params = tvm.relay.build(relay_mod, target=target)

with _make_session(model, target, zephyr_board, mod) as session:
rt_mod = tvm.micro.create_local_graph_runtime(
graph, session.get_system_lib(), session.context
)
rt_mod.set_input(**params)
for name, data in map_inputs.items():
rt_mod.set_input(name, data)
rt_mod.set_input(**params)
rt_mod.run()

out_shapes = out_shape if isinstance(out_shape, list) else [out_shape]
results = result if isinstance(result, list) else [result]

for idx, shape in enumerate(out_shapes):
out = tvm.nd.empty(shape, ctx=session.context)
out = rt_mod.get_output(idx, out)
tvm.testing.assert_allclose(out.asnumpy(), results[idx], rtol=tol, atol=tol)


def test_byoc_utvm(platform):
"""This is a simple test case to check BYOC capabilities of uTVM"""
model, zephyr_board = PLATFORMS[platform]
x = relay.var("x", shape=(10, 10))
w0 = relay.var("w0", shape=(10, 10))
w1 = relay.var("w1", shape=(10, 10))
w2 = relay.var("w2", shape=(10, 10))
w3 = relay.var("w3", shape=(10, 10))
w4 = relay.var("w4", shape=(10, 10))
w5 = relay.var("w5", shape=(10, 10))
w6 = relay.var("w6", shape=(10, 10))
w7 = relay.var("w7", shape=(10, 10))

# C compiler
z0 = relay.add(x, w0)
p0 = relay.subtract(z0, w1)
q0 = relay.multiply(p0, w2)

z1 = relay.add(x, w3)
p1 = relay.subtract(z1, w4)
q1 = relay.multiply(p1, w5)

# Other parts on TVM
z2 = relay.add(x, w6)
q2 = relay.subtract(z2, w7)

r = relay.concatenate((q0, q1, q2), axis=0)
f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r)
mod = tvm.IRModule()
ann = CcompilerAnnotator()
mod["main"] = ann.visit(f)
mod = tvm.relay.transform.PartitionGraph()(mod)
mod = tvm.relay.transform.InferType()(mod)

x_data = np.random.rand(10, 10).astype("float32")
w_data = []
for _ in range(8):
w_data.append(np.random.rand(10, 10).astype("float32"))

map_inputs = {"w{}".format(i): w_data[i] for i in range(8)}
map_inputs["x"] = x_data
check_result(
relay_mod=mod,
map_inputs=map_inputs,
out_shape=(30, 10),
result=np.concatenate(
(
((x_data + w_data[0]) - w_data[1]) * w_data[2],
((x_data + w_data[3]) - w_data[4]) * w_data[5],
x_data + w_data[6] - w_data[7],
),
axis=0,
),
model=model,
zephyr_board=zephyr_board,
)


if __name__ == "__main__":
sys.exit(pytest.main([os.path.dirname(__file__)] + sys.argv[1:]))

0 comments on commit 16a51f9

Please sign in to comment.