Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TVM PyTorch Integration] libstdc++ CXX11 ABI Compatibility & boolean tensor support #12232

Merged
merged 35 commits into from
Aug 17, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 24 additions & 42 deletions src/contrib/torch/tvm_module_wrapper/RuntimeModuleWrapperTVM.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,6 @@ tvm::runtime::Module deserialize(std::string state) {
return ret;
}

tvm::Device getDeviceInfo(DLManagedTensor* input_device) {
tvm::Device ret{input_device->dl_tensor.device.device_type,
input_device->dl_tensor.device.device_id};
return ret;
}

TVM_REGISTER_GLOBAL("tvmtorch.save_runtime_mod").set_body_typed([](tvm::runtime::Module mod) {
ThreadLocalStore::ThreadLocal()->mod = mod;
});
Expand All @@ -106,7 +100,8 @@ TVM_REGISTER_GLOBAL("tvmtorch.save_runtime_mod").set_body_typed([](tvm::runtime:
* @param src Pointer to NDArray
* @return DLPack extended tensor
*/
DLPackTensorExt create_dlpack_tensor_ext(tvm::runtime::NDArray* src, bool is_bool) {
DLPackTensorExt createDLpackTensorExt(tvm::runtime::NDArray* src) {
auto is_bool = src->DataType().is_bool();
DLManagedTensor* tensor;
if (is_bool) {
// If we change DLDataType{kDLInt, 8, 1} to DataType::Bool()
Expand All @@ -121,53 +116,41 @@ DLPackTensorExt create_dlpack_tensor_ext(tvm::runtime::NDArray* src, bool is_boo
}

/*
* Create an empty NDArray with boolean type.
* Create an NDArray with boolean type. (One memory copy)
* @param src DLpack extended tensor
* @return an empty NDArray
* @return a new NDArray
*/
tvm::runtime::NDArray create_empty_bool_ndarray(DLPackTensorExt* src) {
tvm::runtime::NDArray createBoolNDarray(DLPackTensorExt* src) {
juda marked this conversation as resolved.
Show resolved Hide resolved
auto& tensor = src->dl_managed_tensor->dl_tensor;
std::vector<int64_t> shape;
for (int64_t i = 0; i < tensor.ndim; i++) {
shape.push_back(tensor.shape[i]);
}
auto ret = tvm::runtime::NDArray::Empty(shape, DataType::Bool(), tensor.device);
return ret;
}

/*
* Create an NDArray with boolean type. (One memory copy)
* @param src DLpack extended tensor
* @return a new NDArray
*/
tvm::runtime::NDArray create_bool_ndarray(DLPackTensorExt* src) {
auto&& ret = create_empty_bool_ndarray(src);
ret.CopyFrom(&src->dl_managed_tensor->dl_tensor);
return std::move(ret);
}

bool is_zero_copy(DLPackTensorExt* src) {
bool isZeroCopy(DLPackTensorExt* src) {
juda marked this conversation as resolved.
Show resolved Hide resolved
auto& dl_tensor = src->dl_managed_tensor->dl_tensor;
bool is_zero_copy =
tvm::runtime::NDArray::AbilityOfZeroCopyForDLTensor(&dl_tensor, dl_tensor.device);
return is_zero_copy;
return tvm::runtime::NDArray::AbilityOfZeroCopyForDLTensor(&dl_tensor, dl_tensor.device);
}

/*
* Create an NDArray from DLpack extended tensor.
* @param src DLpack extended tensor
* @return a new NDArray
*/
tvm::runtime::NDArray ndarray_from_dlpack(DLPackTensorExt* src) {
tvm::runtime::NDArray ndarrayFromDLpack(DLPackTensorExt* src) {
juda marked this conversation as resolved.
Show resolved Hide resolved
using tvm::runtime::NDArray;

NDArray array;
auto& dl_tensor = src->dl_managed_tensor->dl_tensor;
if (src->is_bool) {
// one memory copy
// the code is similar to NewFromDLTensor except for the type
array = create_bool_ndarray(src);
} else if (is_zero_copy(src)) {
array = createBoolNDarray(src);
} else if (isZeroCopy(src)) {
array = NDArray::FromExternalDLTensor(src->dl_managed_tensor->dl_tensor);
} else {
// one memory copy
Expand All @@ -187,8 +170,8 @@ struct TVMContribTorchRuntimeModule {
explicit TVMContribTorchRuntimeModule(tvm::runtime::Module& mod) : mod(mod) {}
};

bool tvm_contrib_torch_is_be_copied(DLPackTensorExt* src) {
return (src->is_bool) || (!tvm::contrib::is_zero_copy(src));
bool tvm_contrib_torch_tensor_ability_of_zero_copy(DLPackTensorExt* src) {
juda marked this conversation as resolved.
Show resolved Hide resolved
return (src->is_bool) || (!tvm::contrib::isZeroCopy(src));
}

TVMContribTorchRuntimeModule* tvm_contrib_torch_get_last_saved_runtime_module() {
Expand All @@ -205,8 +188,8 @@ void tvm_contrib_torch_operator_module_forward(TVMContribTorchRuntimeModule* run

std::vector<tvm::runtime::NDArray> input_cache(input_size);

for (int k = 0; k < input_size; ++k) {
auto datum = tvm::contrib::ndarray_from_dlpack(&inputs[k]); // could have one memory copy
for (size_t k = 0; k < input_size; ++k) {
auto datum = tvm::contrib::ndarrayFromDLpack(&inputs[k]); // could have one memory copy
input_cache[k] = datum; // we keep the datum in a vector for future use, otherwise the datum
// will be freed after the loop
setter(k, datum);
Expand All @@ -215,16 +198,16 @@ void tvm_contrib_torch_operator_module_forward(TVMContribTorchRuntimeModule* run
run.CallPacked(tvm::runtime::TVMArgs(tvm_values.data(), tvm_type_codes.data(), input_size),
nullptr);

for (int k = 0; k < input_size; ++k) {
if (tvm_contrib_torch_is_be_copied(&inputs[k]))
for (size_t k = 0; k < input_size; ++k) {
if (tvm_contrib_torch_tensor_ability_of_zero_copy(&inputs[k]))
input_cache[k].CopyTo(&inputs[k].dl_managed_tensor->dl_tensor);
}
}

TVMContribTorchRuntimeModule* tvm_contrib_torch_create_graph_runtime_module(
TVMContribTorchRuntimeModule* graph_module, DLManagedTensor* input_example) {
tvm::runtime::PackedFunc built_module = graph_module->mod.GetFunction("default");
tvm::Device device_info = tvm::contrib::getDeviceInfo(input_example);
TVMContribTorchRuntimeModule* graph_executor_factory, DLManagedTensor* input_example) {
tvm::runtime::PackedFunc built_module = graph_executor_factory->mod.GetFunction("default");
tvm::Device device_info = input_example->dl_tensor.device;
tvm::runtime::Module runtime_module = built_module(device_info);
juda marked this conversation as resolved.
Show resolved Hide resolved
return new TVMContribTorchRuntimeModule(runtime_module);
}
Expand All @@ -237,7 +220,7 @@ size_t tvm_contrib_torch_graph_executor_module_forward(TVMContribTorchRuntimeMod
tvm::runtime::PackedFunc get_output = runtime_module->mod.GetFunction("get_output");
tvm::runtime::PackedFunc get_num_outputs = runtime_module->mod.GetFunction("get_num_outputs");

for (int k = 0; k < input_size; ++k) {
for (size_t k = 0; k < input_size; ++k) {
set_input(k, &inputs[k].dl_managed_tensor->dl_tensor);
juda marked this conversation as resolved.
Show resolved Hide resolved
}

Expand All @@ -248,10 +231,9 @@ size_t tvm_contrib_torch_graph_executor_module_forward(TVMContribTorchRuntimeMod
DLPackTensorExt* outputs_ptr = new DLPackTensorExt[output_length];
*outputs = outputs_ptr;

for (int k = 0; k < output_length; ++k) {
for (int64_t k = 0; k < output_length; ++k) {
tvm::runtime::NDArray results = get_output(k);
bool is_bool = results.DataType().is_bool();
outputs_ptr[k] = tvm::contrib::create_dlpack_tensor_ext(&results, is_bool);
outputs_ptr[k] = tvm::contrib::createDLpackTensorExt(&results);
}

return output_length;
Expand All @@ -274,8 +256,8 @@ void tvm_contrib_torch_free_runtime_module(TVMContribTorchRuntimeModule* module_
}

void tvm_contrib_torch_free_dlpack_tensor_ext_array(DLPackTensorExt* dlpack_ptr) {
delete dlpack_ptr;
delete[] dlpack_ptr;
}

void tvm_contrib_torch_free_encoding(char* encoding) { delete encoding; }
void tvm_contrib_torch_free_encoding(char* encoding) { delete[] encoding; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class OperatorModuleWrapper : public torch::jit::CustomClassHolder {
tensors.size());

for (int k = 0; k < input_length; ++k) {
if (tvm_contrib_torch_is_be_copied(&tensors[k])) {
if (tvm_contrib_torch_tensor_ability_of_zero_copy(&tensors[k])) {
inputs[k].copy_(fromDLPackExt(tensors[k]));
} else {
tensors[k].dl_managed_tensor->deleter(tensors[k].dl_managed_tensor);
Expand Down Expand Up @@ -163,7 +163,7 @@ class GraphExecutorFactoryWrapper : public torch::jit::CustomClassHolder {
c10::List<at::Tensor> ret;
ret.reserve(num_outputs);

for (int k = 0; k < num_outputs; ++k) {
for (size_t k = 0; k < num_outputs; ++k) {
at::Tensor atTensor = fromDLPackExt(outputs[k]);
ret.emplace_back(atTensor);
}
Expand Down
20 changes: 11 additions & 9 deletions src/contrib/torch/tvm_module_wrapper/runtime_bridge.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ extern "C" {

/*
* DLPack data structure extend with `is_bool` flag.
* DLPack haven't support boolean tensor,
* thus a boolean tensor will be regarded as a UInt8 tensor.
* DLPack haven't support boolean tensor
* (https://github.com/pytorch/pytorch/blob/4618371da56c887195e2e1d16dad2b9686302800/aten/src/ATen/DLConvertor.cpp#L42),
* thus a boolean tensor will be regarded as a UInt8 tensor
* (https://github.com/apache/tvm/blob/de124862714e747764aa8b7f41a90bcb25f3c6a8/python/tvm/_ffi/runtime_ctypes.py#L91).
*/
struct DLPackTensorExt {
DLManagedTensor* dl_managed_tensor;
Expand All @@ -53,12 +55,12 @@ void tvm_contrib_torch_free_runtime_module(TVMContribTorchRuntimeModule* module_

/*
* Obtain ExecutorFactory runtime module from ExecutorFactory class.
* @param graph_module ExecutorFactory class
* @param graph_executor_factory ExecutorFactory class
* @param input_example For obtaining device information
* @return ExecutorFactory TVM runtime module wrapper
*/
TVMContribTorchRuntimeModule* tvm_contrib_torch_create_graph_runtime_module(
TVMContribTorchRuntimeModule* graph_module, DLManagedTensor* input_example);
TVMContribTorchRuntimeModule* graph_executor_factory, DLManagedTensor* input_example);

/*
* Forward method for OperatorModuleWrapper.
Expand All @@ -71,15 +73,15 @@ void tvm_contrib_torch_operator_module_forward(TVMContribTorchRuntimeModule* run

/*
* Forward method for GraphExecutorFactoryWrapper.
* @param graph_module TVM runtime module wrapper
* @param graph_executor_factory TVM runtime module wrapper
* @param inputs Array pointer of the input tensors
* @param input_size The number of input tensors
* @param outputs The resulting output tensors pointer
* @return The number of output tensors
*/
size_t tvm_contrib_torch_graph_executor_module_forward(TVMContribTorchRuntimeModule* graph_module,
DLPackTensorExt* inputs, size_t input_size,
DLPackTensorExt** outputs);
size_t tvm_contrib_torch_graph_executor_module_forward(
TVMContribTorchRuntimeModule* graph_executor_factory, DLPackTensorExt* inputs,
size_t input_size, DLPackTensorExt** outputs);

/*
* Encode TVM runtime module.
Expand Down Expand Up @@ -108,7 +110,7 @@ void tvm_contrib_torch_free_encoding(char* encoding);
/*
* Checking if a DLPackTensorExt is boolean or cannot be copied in zero cost.
*/
bool tvm_contrib_torch_is_be_copied(DLPackTensorExt*);
bool tvm_contrib_torch_tensor_ability_of_zero_copy(DLPackTensorExt*);
}

#endif // TVM_CONTRIB_TORCH_TVM_MODULE_WRAPPER_RUNTIME_BRIDGE_H_