-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BYOC][TENSOORT] Add support for FP16 on TensorRT BYOC flow #10388
Changes from all commits
e36ceb0
06d8a2c
2c19d92
d357c32
5bdd0ed
422ae09
d0e508b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,8 +85,13 @@ void TensorRTBuilder::AddInput(int nid, uint32_t entry_id, const JSONGraphNode& | |
shape.erase(shape.begin()); | ||
} | ||
nvinfer1::Dims dims = VectorToTrtDims(shape); | ||
ICHECK(TypeMatch(dtypes[i], kDLFloat, 32)) << "Only FP32 inputs are supported."; | ||
auto input_tensor = network_->addInput(name.c_str(), nvinfer1::DataType::kFLOAT, dims); | ||
ICHECK((dtypes[i].bits != 16 || dtypes[i].bits != 32)) | ||
<< "Invalid input Tensor type. Float16 and Float32 are supported"; | ||
|
||
auto tensor_dtype = | ||
(dtypes[i].bits == 16) ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd suggest ICHECK failing if unsupported type. |
||
|
||
auto input_tensor = network_->addInput(name.c_str(), tensor_dtype, dims); | ||
node_output_map_[nid].push_back(TensorRTOpInput(input_tensor)); | ||
network_input_names_.push_back(name); | ||
entry_id_map_[name] = entry_id + i; | ||
|
@@ -141,15 +146,18 @@ void TensorRTBuilder::AddLayer(int nid, const JSONGraphNode& node) { | |
} | ||
params.inputs.push_back(input); | ||
} | ||
ICHECK(converter->variable_input_count || converter->input_types.size() == params.inputs.size()) | ||
<< "Op expected a different number of inputs."; | ||
|
||
// Convert op to TRT. | ||
converter->Convert(¶ms); | ||
|
||
// Get outputs. | ||
node_output_map_[nid] = {}; | ||
for (auto out : params.outputs) { | ||
auto out_type = params.inputs.at(1).weight.type == params.inputs.at(0).tensor->getType() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you explain this? It seems very specific yet AddLayer is used for all of the supported ops. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is unfortunately causing an vector index exception for me. I believe we need to pick up the output type from the node's dtype vector. |
||
? params.inputs.at(0).tensor->getType() | ||
: params.inputs.at(1).weight.type; | ||
out->setType(out_type); | ||
|
||
node_output_map_[nid].push_back(TensorRTOpInput(out)); | ||
} | ||
} | ||
|
@@ -205,18 +213,17 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { | |
nvinfer1::Weights TensorRTBuilder::GetDLTensorAsWeights(const DLTensor* dptr, | ||
DLDeviceType src_device) { | ||
ICHECK_EQ(dptr->device.device_type, src_device); | ||
ICHECK(static_cast<int>(dptr->dtype.code) == kDLFloat || | ||
static_cast<int>(dptr->dtype.code) == kDLInt); | ||
const auto trt_dtype = static_cast<int>(dptr->dtype.code) == kDLFloat | ||
? nvinfer1::DataType::kFLOAT | ||
: nvinfer1::DataType::kINT32; | ||
ICHECK((dptr->dtype.bits != 16 || dptr->dtype.bits != 32)) | ||
<< "Invalid input Tensor type. Float16 and Float32 are supported"; | ||
const auto trt_dtype = (static_cast<int>(dptr->dtype.bits) == 16) ? nvinfer1::DataType::kHALF | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another ICHECK would be in order to make sure we're not silently generating bad code. |
||
: nvinfer1::DataType::kFLOAT; | ||
|
||
const size_t weight_bytes = GetDataSize(*dptr); | ||
nvinfer1::Weights weight{trt_dtype, nullptr, 0}; | ||
size_t count = 1; | ||
for (tvm_index_t i = 0; i < dptr->ndim; ++i) { | ||
count *= dptr->shape[i]; | ||
} | ||
ICHECK_EQ(count * 4, weight_bytes); | ||
weight.count = count; | ||
weight.values = new float[count]; | ||
ICHECK_EQ(TVMArrayCopyToBytes(const_cast<DLTensor*>(dptr), const_cast<void*>(weight.values), | ||
|
@@ -250,7 +257,7 @@ void TensorRTBuilder::CleanUp() { | |
#endif | ||
builder_->destroy(); | ||
for (auto weight : trt_weights_) { | ||
if (weight.type == nvinfer1::DataType::kFLOAT) { | ||
if (weight.type == nvinfer1::DataType::kFLOAT || weight.type == nvinfer1::DataType::kHALF) { | ||
delete[] static_cast<const float*>(weight.values); | ||
} else { | ||
delete[] static_cast<const uint16_t*>(weight.values); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is always true, I think you mean bits == 16 || bits == 32.