forked from apache/tvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add initial support for quantized transpose convolution in Relay
This work is based on @jainris initial PR: apache#6523 I added a relay.qnn.conv2d_transpose node. The strategy I followed is to convert to int16 and invoke nn.conv2d_transpose (which already exists in relay). Main changes: - The node declaration lives in relay/qnn/op/convolution_transpose.cc - Cast int8->int16 and subsequent offset removal is in tvm/relay/qnn/op/legalizations.py. - I added and tested the operator in the tflite front-end - I added a unit-test in Relay for qnn.conv2d_transpose Co-authored-by: Rishabh Jain
- Loading branch information
Giuseppe Rossini
committed
Nov 11, 2020
1 parent
b7318a7
commit 6da5514
Showing
6 changed files
with
1,084 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
/*! | ||
* \file src/relay/qnn/op/convolution.cc | ||
* \brief Property def of qnn convolution operator. | ||
*/ | ||
#include <tvm/relay/analysis.h> | ||
#include <tvm/relay/base.h> | ||
#include <tvm/relay/op.h> | ||
#include <tvm/relay/qnn/attrs.h> | ||
#include <tvm/relay/transform.h> | ||
#include <tvm/tir/analysis.h> | ||
#include <tvm/tir/data_layout.h> | ||
|
||
#include "../../op/nn/convolution.h" | ||
#include "../../transforms/pattern_utils.h" | ||
#include "../utils.h" | ||
|
||
namespace tvm { | ||
namespace relay { | ||
namespace qnn { | ||
|
||
// relay.op.qnn.conv2d_transpose | ||
|
||
inline Expr MakeQnnConv2DTranspose(Expr data, Expr weight, Expr input_zero_point, | ||
Expr kernel_zero_point, Expr input_scale, Expr kernel_scale, | ||
Array<IndexExpr> strides, Array<IndexExpr> padding, | ||
Array<IndexExpr> dilation, int groups, IndexExpr channels, | ||
Array<IndexExpr> kernel_size, std::string data_layout, | ||
std::string kernel_layout, std::string out_layout, | ||
Array<IndexExpr> output_padding, DataType out_dtype) { | ||
auto attrs = make_object<Conv2DTransposeAttrs>(); | ||
attrs->strides = std::move(strides); | ||
attrs->padding = std::move(padding); | ||
attrs->dilation = std::move(dilation); | ||
attrs->groups = groups; | ||
attrs->channels = std::move(channels); | ||
attrs->kernel_size = std::move(kernel_size); | ||
attrs->data_layout = std::move(data_layout); | ||
attrs->kernel_layout = std::move(kernel_layout); | ||
attrs->out_layout = std::move(out_layout); | ||
attrs->output_padding = std::move(output_padding); | ||
attrs->out_dtype = std::move(out_dtype); | ||
const Op& op = Op::Get("qnn.conv2d_transpose"); | ||
return Call(op, {data, weight, input_zero_point, kernel_zero_point, input_scale, kernel_scale}, | ||
Attrs(attrs), {}); | ||
} | ||
|
||
Array<Array<Layout>> QnnConvTransposeInferCorrectLayout( | ||
const Attrs& attrs, const Array<Layout>& new_in_layouts, const Array<Layout>& old_in_layouts, | ||
const Array<tvm::relay::Type>& old_in_types) { | ||
// Use Relay Conv2D Infer correct layout. | ||
auto layouts = ConvInferCorrectLayout<Conv2DTransposeAttrs>(attrs, new_in_layouts, old_in_layouts, | ||
old_in_types); | ||
|
||
// Fill the layouts of remaining input tensors - scales and zero points. The layouts of these | ||
// tensors can be treated as channel layout. | ||
Layout channel_layout = Layout("C"); | ||
Array<Layout> input_layouts = {layouts[0][0], layouts[0][1], channel_layout, | ||
channel_layout, channel_layout, channel_layout}; | ||
Array<Layout> output_layouts = layouts[1]; | ||
return {input_layouts, output_layouts}; | ||
} | ||
|
||
bool QnnConv2DTransposeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs, | ||
const TypeReporter& reporter) { | ||
ICHECK_EQ(types.size(), 7); | ||
const auto* data = types[0].as<TensorTypeNode>(); | ||
const auto* weight = types[1].as<TensorTypeNode>(); | ||
if (data == nullptr || weight == nullptr) return false; | ||
const auto* param = attrs.as<Conv2DTransposeAttrs>(); | ||
ICHECK(param != nullptr) << "Conv2DTransposeAttrs cannot be nullptr."; | ||
ICHECK(data->dtype == DataType::Int(8) || data->dtype == DataType::UInt(8)) | ||
<< "Expected qnn conv2d type(int8, uint8) for input but was " << data->dtype; | ||
ICHECK(weight->dtype == DataType::Int(8) || weight->dtype == DataType::UInt(8)) | ||
<< "Expected qnn conv2d type(int8, uint8) for weight but was " << weight->dtype; | ||
ICHECK(param->out_dtype == DataType::Int(16) || param->out_dtype == DataType::Int(32)) | ||
<< "Expected qnn conv2d type(int32, int16) for output but was " << param->out_dtype; | ||
ICHECK(param->out_dtype.bits() > 0) << "Output dtype bits should be greater than 0."; | ||
|
||
// Check the types of scale and zero points. | ||
ICHECK(IsScalarType(types[2], DataType::Int(32))); // input_zero_point | ||
ICHECK(IsScalarType(types[3], DataType::Int(32))); // kernel_zero_point | ||
ICHECK(IsScalarType(types[4], DataType::Float(32))); // input_scale | ||
// Kernel scale can be a vector of length output_channels or a scalar. | ||
if (param->groups == 1) { | ||
size_t axis = param->kernel_layout.find('O'); | ||
ICHECK(axis != std::string::npos) << "Kernel layout attribute is not defined"; | ||
AssignType(types[5], DataType::Float(32), weight->shape[axis], reporter); // kernel scale | ||
} else { | ||
// Here, total number of output channels depend on depth multiplier. | ||
size_t o_axis = param->kernel_layout.find('O'); | ||
size_t i_axis = param->kernel_layout.find('I'); | ||
ICHECK(o_axis != std::string::npos || i_axis != std::string::npos) | ||
<< "Kernel layout attribute is not defined"; | ||
AssignType(types[5], DataType::Float(32), weight->shape[i_axis] * weight->shape[o_axis], | ||
reporter); // kernel scale | ||
} | ||
|
||
// Collect the input tensor and output tensor devoid of scale and zero points to reuse Relay | ||
// Conv2D infer type function. | ||
Array<Type> tensor_types = {types[0], types[1], types[6]}; | ||
return Conv2DTransposeRel<Conv2DTransposeAttrs>(tensor_types, 3, attrs, reporter); | ||
} | ||
|
||
RELAY_REGISTER_OP("qnn.conv2d_transpose") | ||
.describe(R"code(Quantized transposed 2D convolution layer (sometimes called Deconvolution). | ||
This operator deconvolves quantized weight with quantized data. The scale of the | ||
output quantized tensor is the product of the weight_scale and input_scale of | ||
the input quantized tensors. The zero point of the output quantized tensor is | ||
0. By default, the dtype of output is int32. Please also refer to Requantize | ||
operator to understand how to scale back the int32 output to (u)int8. | ||
- **data**: This depends on the `layout` parameter. Input is 4D array of shape | ||
(batch_size, in_channels, height, width) if `layout` is `NCHW`. | ||
- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1]) | ||
- **out**: This depends on the `layout` parameter. Output is 4D array of shape | ||
(batch_size, channels, out_height, out_width) if `layout` is `NCHW`. | ||
)code" TVM_ADD_FILELINE) | ||
.set_attrs_type<Conv2DTransposeAttrs>() | ||
.set_num_inputs(6) | ||
.add_argument("data", "Tensor", "The quantized input data tensor.") | ||
.add_argument("weight", "Tensor", "The quantized weight tensor.") | ||
.add_argument("input_scale", "Tensor", "The quantization scale of the input tensor.") | ||
.add_argument("input_zero_point", "Tensor", "The quantization zero_point of the input tensor.") | ||
.add_argument("weight_scale", "Tensor", "The quantization scale of the weight tensor.") | ||
.add_argument("weight_zero_point", "Tensor", | ||
"The quantization zero_point of the weight tensor.") | ||
.set_support_level(11) | ||
.add_type_rel("QnnConv2DTranspose", QnnConv2DTransposeRel) | ||
.set_attr<TNonComputational>("TNonComputational", true) | ||
.set_attr<FInferCorrectLayout>("FInferCorrectLayout", QnnConvTransposeInferCorrectLayout); | ||
|
||
TVM_REGISTER_GLOBAL("relay.qnn.op._make.conv2d_transpose").set_body_typed(MakeQnnConv2DTranspose); | ||
|
||
} // namespace qnn | ||
} // namespace relay | ||
} // namespace tvm |
Oops, something went wrong.