-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BYOC][TENSOORT] Add support for FP16 on TensorRT BYOC flow #10388
Changes from 4 commits
e36ceb0
06d8a2c
2c19d92
d357c32
5bdd0ed
422ae09
d0e508b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,8 +85,10 @@ void TensorRTBuilder::AddInput(int nid, uint32_t entry_id, const JSONGraphNode& | |
shape.erase(shape.begin()); | ||
} | ||
nvinfer1::Dims dims = VectorToTrtDims(shape); | ||
ICHECK(TypeMatch(dtypes[i], kDLFloat, 32)) << "Only FP32 inputs are supported."; | ||
auto input_tensor = network_->addInput(name.c_str(), nvinfer1::DataType::kFLOAT, dims); | ||
auto tensor_dtype = | ||
(dtypes[i].bits == 16) ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd suggest ICHECK failing if unsupported type. |
||
|
||
auto input_tensor = network_->addInput(name.c_str(), tensor_dtype, dims); | ||
node_output_map_[nid].push_back(TensorRTOpInput(input_tensor)); | ||
network_input_names_.push_back(name); | ||
entry_id_map_[name] = entry_id + i; | ||
|
@@ -139,17 +141,21 @@ void TensorRTBuilder::AddLayer(int nid, const JSONGraphNode& node) { | |
<< " requires weights but got a tensor."; | ||
} | ||
} | ||
VLOG(1) << "INT " << input.type; | ||
mikepapadim marked this conversation as resolved.
Show resolved
Hide resolved
|
||
params.inputs.push_back(input); | ||
} | ||
ICHECK(converter->variable_input_count || converter->input_types.size() == params.inputs.size()) | ||
<< "Op expected a different number of inputs."; | ||
|
||
// Convert op to TRT. | ||
converter->Convert(¶ms); | ||
|
||
// Get outputs. | ||
node_output_map_[nid] = {}; | ||
for (auto out : params.outputs) { | ||
auto out_type = params.inputs.at(1).weight.type == params.inputs.at(0).tensor->getType() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you explain this? It seems very specific yet AddLayer is used for all of the supported ops. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is unfortunately causing an vector index exception for me. I believe we need to pick up the output type from the node's dtype vector. |
||
? params.inputs.at(0).tensor->getType() | ||
: params.inputs.at(1).weight.type; | ||
out->setType(out_type); | ||
|
||
node_output_map_[nid].push_back(TensorRTOpInput(out)); | ||
} | ||
} | ||
|
@@ -205,18 +211,16 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { | |
nvinfer1::Weights TensorRTBuilder::GetDLTensorAsWeights(const DLTensor* dptr, | ||
DLDeviceType src_device) { | ||
ICHECK_EQ(dptr->device.device_type, src_device); | ||
ICHECK(static_cast<int>(dptr->dtype.code) == kDLFloat || | ||
static_cast<int>(dptr->dtype.code) == kDLInt); | ||
const auto trt_dtype = static_cast<int>(dptr->dtype.code) == kDLFloat | ||
? nvinfer1::DataType::kFLOAT | ||
: nvinfer1::DataType::kINT32; | ||
|
||
const auto trt_dtype = (static_cast<int>(dptr->dtype.bits) == 16) ? nvinfer1::DataType::kHALF | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another ICHECK would be in order to make sure we're not silently generating bad code. |
||
: nvinfer1::DataType::kFLOAT; | ||
|
||
const size_t weight_bytes = GetDataSize(*dptr); | ||
nvinfer1::Weights weight{trt_dtype, nullptr, 0}; | ||
size_t count = 1; | ||
for (tvm_index_t i = 0; i < dptr->ndim; ++i) { | ||
count *= dptr->shape[i]; | ||
} | ||
ICHECK_EQ(count * 4, weight_bytes); | ||
weight.count = count; | ||
weight.values = new float[count]; | ||
ICHECK_EQ(TVMArrayCopyToBytes(const_cast<DLTensor*>(dptr), const_cast<void*>(weight.values), | ||
|
@@ -250,7 +254,7 @@ void TensorRTBuilder::CleanUp() { | |
#endif | ||
builder_->destroy(); | ||
for (auto weight : trt_weights_) { | ||
if (weight.type == nvinfer1::DataType::kFLOAT) { | ||
if (static_cast<int>(weight.type) <= 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we avoid hard coding the enum constants? |
||
delete[] static_cast<const float*>(weight.values); | ||
} else { | ||
delete[] static_cast<const uint16_t*>(weight.values); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not seeing where the type check (which must now be generalized to float32/float16) has gone too. If we remove it altogether then I think we'll either generate bad code or fail at trt build time, which from the tvm users point of view is runtime and too late. We also need to check in the predicate to prevent collage from exploring invalid candidate kernels.