Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CMSIS-NN][Perf] Converted Relay Conv2D into CMSIS-NN Depthwise #12006

Merged
merged 3 commits into from
Jul 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions apps/microtvm/zephyr_cmsisnn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ set(DATA_FILES
)
set(CMSIS_SOURCES
${CMSIS_PATH}/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
${CMSIS_PATH}/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
Expand Down
46 changes: 46 additions & 0 deletions src/relay/backend/contrib/cmsisnn/convolutions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "convolutions.h"

#include <string>

#include "../../../qnn/utils.h"
#include "tvm/ir/transform.h"
#include "tvm/relay/attrs/nn.h"

namespace tvm {
namespace relay {
namespace contrib {
namespace cmsisnn {

bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
const Array<PrimExpr>& kernel_shape) {
std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
int kernel_pos_o = kernel_layout.find("O");
int kernel_pos_i = kernel_layout.find("I");
int kernel_dim_o_val = qnn::get_const_int(kernel_shape[kernel_pos_o]);
int kernel_dim_i_val = qnn::get_const_int(kernel_shape[kernel_pos_i]);
int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
return (out_channels == kernel_dim_o_val * kernel_dim_i_val);
}

} // namespace cmsisnn
} // namespace contrib
} // namespace relay
} // namespace tvm
60 changes: 60 additions & 0 deletions src/relay/backend/contrib/cmsisnn/convolutions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file src/relay/backend/contrib/cmsisnn/convolutions.h
* \brief CMSIS-NN utility functions for Convolutions
*/

#ifndef TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_
#define TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_

#include <tvm/relay/attrs/nn.h>
#include <tvm/relay/attrs/transform.h>
#include <tvm/relay/expr_functor.h>
#include <tvm/relay/transform.h>
#include <tvm/runtime/ndarray.h>

#include "../../../op/make_op.h"
#include "../../../qnn/utils.h"
#include "../../../transforms/pattern_utils.h"

namespace tvm {
namespace relay {
namespace contrib {
namespace cmsisnn {
/*!
* \brief Checks if Relay Conv2D was originally CMSIS-NN compliant Depthwise Convolution
* See:
* https://github.com/apache/tvm/blob/6ed3ab3e33f8eafa4acaf53b7a671831de7587e9/python/tvm/relay/frontend/tflite.py#L2107
*
*
* \return true if a Conv2D is a Depthwise Convolution based on Conv2D's inputs' shapes and
* attributes
*/

bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
const Array<PrimExpr>& kernel_shape);

} // namespace cmsisnn
} // namespace contrib
} // namespace relay
} // namespace tvm

#endif // TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_
7 changes: 2 additions & 5 deletions src/relay/backend/contrib/cmsisnn/generate_constants.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "../../../op/make_op.h"
#include "../../../qnn/utils.h"
#include "../../../transforms/pattern_utils.h"
#include "convolutions.h"

namespace tvm {
namespace relay {
Expand Down Expand Up @@ -111,11 +112,7 @@ class GenerateConstantsMutator : public MixedModeMutator {

Array<PrimExpr> input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
Array<PrimExpr> kernel_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
int kernel_pos_o = kernel_layout.find("O");
int groups = conv2d_attrs->groups;
if (groups != qnn::get_const_int(input_shape[3]) ||
groups != qnn::get_const_int(kernel_shape[kernel_pos_o])) {
if (!IsCMSISNNDepthwise(conv2d_attrs, input_shape, kernel_shape)) {
// Transpose weights: HWIO -> OHWI for Conv2D
conv2d_kernel = ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
}
Expand Down
13 changes: 7 additions & 6 deletions src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
Expand Down Expand Up @@ -31,6 +30,7 @@
#include "../../../transforms/pattern_utils.h"
#include "buffer_size.h"
#include "compiler_attrs.h"
#include "convolutions.h"

namespace tvm {
namespace relay {
Expand Down Expand Up @@ -173,7 +173,6 @@ class RelayToTIRVisitor : public MixedModeMutator {
int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
int32_t out_channels = qnn::get_const_int(conv2d_attrs->channels);
int32_t groups = conv2d_attrs->groups;
std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
int32_t clip_min = std::numeric_limits<int8_t>::min();
int32_t clip_max = std::numeric_limits<int8_t>::max();
Expand Down Expand Up @@ -207,11 +206,13 @@ class RelayToTIRVisitor : public MixedModeMutator {
int32_t output_c = qnn::get_const_int(output_shape[3]);

int32_t depth_multiplier = -1;
int kernel_pos_o = kernel_layout.find("O");
if (groups == qnn::get_const_int(input_shape[3]) &&
groups == qnn::get_const_int(filter_shape[kernel_pos_o])) {
if (IsCMSISNNDepthwise(conv2d_attrs, input_shape, filter_shape)) {
// Refer to TVM frontend to know how depth multiplier and out_channels are related
// https://github.com/apache/tvm/blob/6ed3ab3e33f8eafa4acaf53b7a671831de7587e9/python/tvm/relay/frontend/tflite.py#L2129
int kernel_pos_i = kernel_layout.find("I");
depth_multiplier = qnn::get_const_int(filter_shape[kernel_pos_i]);
int kernel_pos_o = kernel_layout.find("O");
int kernel_pos_dm = input_c == 1 ? kernel_pos_o : kernel_pos_i;
depth_multiplier = qnn::get_const_int(filter_shape[kernel_pos_dm]);
}
scalar_args.push_back(ToArg(depth_multiplier));

Expand Down
144 changes: 140 additions & 4 deletions tests/python/contrib/test_cmsisnn/test_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@
from tvm import relay
from tvm.relay.op.contrib import cmsisnn

from tvm.testing.aot import generate_ref_data, AOTTestModel, compile_models, compile_and_run

from tvm.testing.aot import (
generate_ref_data,
AOTTestModel,
compile_models,
compile_and_run,
run_and_check,
)
from tvm.micro.testing.aot_test_utils import AOT_USMP_CORSTONE300_RUNNER
from .utils import (
make_module,
Expand Down Expand Up @@ -84,13 +89,14 @@ def make_model(
)
)
weight_const = relay.const(weight, kernel_dtype)
conv2d_kernel_sc = kernel_scale[0] if out_channels == 1 else kernel_scale
conv = relay.qnn.op.conv2d(
invar,
weight_const,
input_zero_point=relay.const(input_zero_point, "int32"),
kernel_zero_point=relay.const(kernel_zero_point, "int32"),
input_scale=relay.const(input_scale, "float32"),
kernel_scale=relay.const(kernel_scale, "float32"),
kernel_scale=relay.const(conv2d_kernel_sc, "float32"),
kernel_size=(kernel_h, kernel_w),
data_layout="NHWC",
kernel_layout=weight_format,
Expand All @@ -105,6 +111,7 @@ def make_model(
bias_const = relay.const(bias, "int32")
last_op = relay.nn.bias_add(conv, bias_const, axis=3) if enable_bias else conv
requant_input_sc = [sc * input_scale for sc in kernel_scale]
requant_input_sc = requant_input_sc[0] if out_channels == 1 else requant_input_sc
last_op = relay.qnn.op.requantize(
last_op,
relay.const(requant_input_sc, "float32"),
Expand Down Expand Up @@ -209,7 +216,7 @@ def test_conv2d_number_primfunc_args(
cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"]
assert (
len(cmsisnn_func.params) == expected_num_params
), "Generated unexpected number of function arguments"
), "Generated unexpected number of function arguments."


@tvm.testing.requires_cmsisnn
Expand Down Expand Up @@ -540,6 +547,135 @@ def test_depthwise_int8(
)


@tvm.testing.requires_cmsisnn
@pytest.mark.parametrize("padding", ["SAME", "VALID"])
@pytest.mark.parametrize("strides, dilation", [((1, 1), (1, 1))])
@pytest.mark.parametrize("relu_type", ["RELU", "NONE"])
@pytest.mark.parametrize("depth_multiplier", [1, 3])
@pytest.mark.parametrize(
"input_zero_point, input_scale, kernel_scale",
[
(
10,
0.0128,
[0.11, 0.22],
),
(
-64,
1,
[1, 0.0256, 1.37],
),
],
)
def test_relay_conv2d_cmsisnn_depthwise_int8(
padding,
strides,
dilation,
relu_type,
input_zero_point,
input_scale,
kernel_scale,
depth_multiplier,
):
"""Tests QNN Depthwise int8 op via CMSIS-NN"""
interface_api = "c"
use_unpacked_api = True
test_runner = AOT_USMP_CORSTONE300_RUNNER

dtype = "int8"
in_min, in_max = get_range_for_dtype_str(dtype)

ifm_shape = (1, 24, 24, 1)
groups = ifm_shape[3]
weight_format = "HWIO"
(kernel_h, kernel_w) = (3, 3)
kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier)
out_channels = ifm_shape[3] * depth_multiplier
enable_bias = True
ks_len = len(kernel_scale)
kernel_zero_point = 0
kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)]

output_scale, output_zero_point = get_conv2d_qnn_params(
kernel_shape,
input_scale,
input_zero_point,
kernel_scale,
kernel_zero_point,
dtype,
dtype,
dtype,
True,
)

model, params = make_model(
ifm_shape,
kernel_shape,
input_zero_point,
input_scale,
kernel_zero_point,
kernel_scale,
output_zero_point,
output_scale,
padding,
strides,
dilation,
groups,
dtype,
dtype,
out_channels,
weight_format,
enable_bias,
relu_type,
)
orig_mod = make_module(model)
cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)

# validate pattern matching
assert_partitioned_function(orig_mod, cmsisnn_mod)
ashutosh-arm marked this conversation as resolved.
Show resolved Hide resolved

# generate reference output
rng = np.random.default_rng(12345)
inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
output_list = generate_ref_data(orig_mod["main"], inputs, params)

# validate presence of depthwise convolution
compiled_models = compile_models(
AOTTestModel(
module=cmsisnn_mod,
inputs=inputs,
outputs=output_list,
params=params,
output_tolerance=1,
),
interface_api,
use_unpacked_api,
pass_config=test_runner.pass_config,
)

cmsisnn_tir_mod = None
for target, mod in compiled_models[0].executor_factory.lowered_ir_mods.items():
if target.kind.name == "cmsis-nn":
cmsisnn_tir_mod = mod

cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"]
call_extern = None
if isinstance(cmsisnn_func.body, tvm.tir.stmt.Evaluate):
call_extern = cmsisnn_func.body.value
else:
call_extern = cmsisnn_func.body.body.value
Mousius marked this conversation as resolved.
Show resolved Hide resolved
assert (
call_extern.args[0].value == "arm_depthwise_conv_wrapper_s8"
), "Relay Conv2D should be mapped to CMSIS-NN Depthwise Convolution."

# validate the output
run_and_check(
models=compiled_models,
runner=test_runner,
interface_api=interface_api,
)


def parameterize_for_invalid_model(test):
"""Generates non int8 inputs"""
in_dtype = ["uint8", "int8"]
Expand Down