Skip to content

Commit

Permalink
respond to review comments
Browse files Browse the repository at this point in the history
respond to review comments
  • Loading branch information
Matthew committed Sep 7, 2021
1 parent 298b81d commit e5d80e5
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 7 deletions.
2 changes: 1 addition & 1 deletion include/tvm/ir/affine_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class TensorAffineTypeNode : public AffineTypeNode {
RelayExpr zero_point;
/*! \brief The data type of this type */
DataType dtype;
/*! \brief The data type of this type */
/*! \brief The axis for per-channel quantization */
int axis;

void VisitAttrs(tvm::AttrVisitor* v) {
Expand Down
3 changes: 3 additions & 0 deletions python/tvm/ir/affine_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ class TensorAffineType(AffineType):
dtype : str
The content data type.
axis : int
The axis for per-channel quantization.
"""

def __init__(self, scale, zero_point, dtype, axis=-1):
Expand Down
9 changes: 7 additions & 2 deletions python/tvm/relay/qnn/op/qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,10 @@ def conv2d(
):
r"""Quantized 2D convolution.
This operator convolves quantized data with quantized kernel. The scale of
the output quantized tensor is the product of the kernel_scale and
This operator convolves quantized data with quantized kernel.
If doing Per-channel quantization, qnn expects the kernel_zero_scale
and optionally the kernel_zero_point will be 1-D vectors instead of scalars.
The scale of the output quantized tensor is the product of the kernel_scale and
input_scale of the input quantized tensors. The zero point of the output
quantized tensor is 0. By default, the dtype of output is int32. Please also
refer to Requantize operator to understand how to scale back the int32
Expand Down Expand Up @@ -544,6 +546,9 @@ def dense(
`Y = X * W`
If doing Per-channel quantization, qnn expects the kernel_zero_scale
and optionally the kernel_zero_point will be 1-D vectors instead of scalars.
Parameters
----------
data : tvm.relay.Expr
Expand Down
9 changes: 5 additions & 4 deletions python/tvm/relay/transform/fake_quantization_to_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,13 @@ def relu(expr, type_map):
arg = expr.args[0]
t = type_map[arg]
scale_shape = infer_shape(t.scale)
zero = relay.const(0, dtype="float32")
if len(scale_shape) > 0 and scale_shape[0] > 1:
z_p = t.zero_point
assert len(scale_shape) <= 1
if len(scale_shape) == 1 and scale_shape[0] > 1:
b_shape = [1] * len(infer_shape(arg))
b_shape[t.axis] = -1
zero = relay.op.reshape(relay.op.broadcast_to(zero, scale_shape), b_shape)
zero = relay.qnn.op.quantize(zero, t.scale, t.zero_point, t.axis, t.dtype)
z_p = relay.op.reshape(relay.op.broadcast_to(z_p, scale_shape), b_shape)
zero = relay.op.cast(z_p, t.dtype)
return [relay.op.maximum(arg, fold_constant(zero)), t]


Expand Down
1 change: 1 addition & 0 deletions src/relay/qnn/op/dense.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ bool QnnDenseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
}
ICHECK(IsScalarType(types[2], DataType::Int(32))); // input_zero_point
ICHECK(IsScalarType(types[4], DataType::Float(32))); // input_scale
// weight_zero_point can be a scalar or a vector of the same shape as the weight_scale
AssignType(types[5], DataType::Float(32), param->units, reporter); // weight_scale

ICHECK(param->out_dtype.bits() > 0) << "Output dtype bits should be greater than 0.";
Expand Down

0 comments on commit e5d80e5

Please sign in to comment.