Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MKLDNN]Add quantized concat #13297

Merged
merged 12 commits into from
Nov 28, 2018
1 change: 0 additions & 1 deletion example/ssd/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ def save_params(fname, arg_params, aux_params, logger=None):
mean_args = {'mean_r': rgb_mean[0], 'mean_g': rgb_mean[1], 'mean_b': rgb_mean[2]}

if calib_mode == 'none':
logger.info('Quantizing FP32 model %s' % args.model)
qsym, qarg_params, aux_params = quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params,
ctx=ctx, excluded_sym_names=excluded_sym_names,
calib_mode=calib_mode, quantized_dtype=args.quantized_dtype,
Expand Down
108 changes: 108 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_concat-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file mkldnn_concat-inl.h
* \brief
* \author Wenting Jiang
*/
#ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONCAT_INL_H_
#define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONCAT_INL_H_


#if MXNET_USE_MKLDNN == 1
#include <vector>
#include <utility>
#include "../concat-inl.h"
#include "./mkldnn_ops-inl.h"
#include "./mkldnn_base-inl.h"

namespace mxnet {
namespace op {

class MKLDNNConcatFwd {
std::shared_ptr<mkldnn::concat> fwd;
ZhennanQin marked this conversation as resolved.
Show resolved Hide resolved
std::vector<std::shared_ptr<mkldnn::memory>> data;
std::vector<mkldnn::primitive::at> data_mem;
std::shared_ptr<mkldnn::memory> out;

public:
mkldnn::concat::primitive_desc fwd_pd;

MKLDNNConcatFwd(
int concat_dim,
const std::vector<mkldnn::memory::primitive_desc> &data_md): fwd_pd(concat_dim, data_md) {
data.resize(data_md.size());
ZhennanQin marked this conversation as resolved.
Show resolved Hide resolved
}

void SetNewMem(const std::vector<const mkldnn::memory *> &in_data,
ZhennanQin marked this conversation as resolved.
Show resolved Hide resolved
const mkldnn::memory &output) {
CHECK_EQ(in_data.size(), data.size());
for (size_t i = 0; i < data.size(); i++) {
if (this->data[i] == nullptr) {
this->data[i] = std::shared_ptr<mkldnn::memory>(new mkldnn::memory(
in_data[i]->get_primitive_desc(), in_data[i]->get_data_handle()));
this->data_mem.push_back(*this->data[i]);
} else {
this->data[i]->set_data_handle(in_data[i]->get_data_handle());
}
}
if (this->out == nullptr)
this->out = std::shared_ptr<mkldnn::memory>(new mkldnn::memory(
fwd_pd.dst_primitive_desc(), output.get_data_handle()));
else
this->out->set_data_handle(output.get_data_handle());

if (this->fwd == nullptr)
fwd.reset(new mkldnn::concat(fwd_pd, data_mem, *out));
}

const mkldnn::concat &GetFwd() const {
return *fwd;
}
};

static MKLDNNConcatFwd &GetConcatForward(
ZhennanQin marked this conversation as resolved.
Show resolved Hide resolved
int concat_dim, const std::vector<NDArray> &in_data,
const std::vector<mkldnn::memory::primitive_desc> &data_md) {
#if DMLC_CXX11_THREAD_LOCAL
static thread_local std::unordered_map<OpSignature, MKLDNNConcatFwd, OpHash> fwds;
#else
static MX_THREAD_LOCAL std::unordered_map<OpSignature, MKLDNNConcatFwd, OpHash> fwds;
#endif
OpSignature key;
key.AddSign(concat_dim);
key.AddSign(in_data);

auto it = fwds.find(key);
if (it == fwds.end()) {
MKLDNNConcatFwd fwd(concat_dim, data_md);
auto ins_ret = fwds.insert(std::pair<OpSignature, MKLDNNConcatFwd>(
key, fwd));
CHECK(ins_ret.second);
it = ins_ret.first;
}
return it->second;
}

} // namespace op
} // namespace mxnet

#endif // MXNET_USE_MKLDNN == 1
#endif // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONCAT_INL_H_
70 changes: 2 additions & 68 deletions src/operator/nn/mkldnn/mkldnn_concat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,79 +22,13 @@
* \brief
* \author Wenting Jiang
*/
#include "../concat-inl.h"
#include "./mkldnn_ops-inl.h"
#include "./mkldnn_base-inl.h"

#if MXNET_USE_MKLDNN == 1
#include "mkldnn_concat-inl.h"

namespace mxnet {
namespace op {

class MKLDNNConcatFwd {
std::shared_ptr<mkldnn::concat> fwd;
std::vector<std::shared_ptr<mkldnn::memory>> data;
std::vector<mkldnn::primitive::at> data_mem;
std::shared_ptr<mkldnn::memory> out;

public:
mkldnn::concat::primitive_desc fwd_pd;

MKLDNNConcatFwd(
int concat_dim,
const std::vector<mkldnn::memory::primitive_desc> &data_md): fwd_pd(concat_dim, data_md) {
data.resize(data_md.size());
}

void SetNewMem(const std::vector<const mkldnn::memory *> &in_data,
const mkldnn::memory &output) {
CHECK_EQ(in_data.size(), data.size());
for (size_t i = 0; i < data.size(); i++) {
if (this->data[i] == nullptr) {
this->data[i] = std::shared_ptr<mkldnn::memory>(new mkldnn::memory(
in_data[i]->get_primitive_desc(), in_data[i]->get_data_handle()));
this->data_mem.push_back(*this->data[i]);
} else {
this->data[i]->set_data_handle(in_data[i]->get_data_handle());
}
}
if (this->out == nullptr)
this->out = std::shared_ptr<mkldnn::memory>(new mkldnn::memory(
fwd_pd.dst_primitive_desc(), output.get_data_handle()));
else
this->out->set_data_handle(output.get_data_handle());

if (this->fwd == nullptr)
fwd.reset(new mkldnn::concat(fwd_pd, data_mem, *out));
}

const mkldnn::concat &GetFwd() const {
return *fwd;
}
};

static MKLDNNConcatFwd &GetConcatForward(
int concat_dim, const std::vector<NDArray> &in_data,
const std::vector<mkldnn::memory::primitive_desc> &data_md) {
#if DMLC_CXX11_THREAD_LOCAL
static thread_local std::unordered_map<OpSignature, MKLDNNConcatFwd, OpHash> fwds;
#else
static MX_THREAD_LOCAL std::unordered_map<OpSignature, MKLDNNConcatFwd, OpHash> fwds;
#endif
OpSignature key;
key.AddSign(concat_dim);
key.AddSign(in_data);

auto it = fwds.find(key);
if (it == fwds.end()) {
MKLDNNConcatFwd fwd(concat_dim, data_md);
auto ins_ret = fwds.insert(std::pair<OpSignature, MKLDNNConcatFwd>(
key, fwd));
CHECK(ins_ret.second);
it = ins_ret.first;
}
return it->second;
}

void MKLDNNConcatForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
const std::vector<NDArray> &in_data,
const std::vector<OpReqType> &req,
Expand Down
119 changes: 119 additions & 0 deletions src/operator/quantization/mkldnn/mkldnn_quantized_concat.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2018 by Contributors
* \file quantized_concat.cc
* \brief
*/

#if MXNET_USE_MKLDNN == 1
#include "../../nn/mkldnn/mkldnn_concat-inl.h"
#include "../quantization_utils.h"

namespace mxnet {
namespace op {

namespace quantized_concat_enum {
ZhennanQin marked this conversation as resolved.
Show resolved Hide resolved
enum QuantizedConcatOutputs { kOut, kMin, kMax };
}

static float GetScale(const NDArray& data, float min, float max) {
auto data_range = (data.dtype() == mshadow::kInt8) ? kInt8Range : kUint8Range;
return data_range / MaxAbs(min, max);
}

static void MKLDNNQuantizedConcatForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx,
const std::vector<NDArray>& in_data,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& out_data) {
const ConcatParam& param_ = nnvm::get<ConcatParam>(attrs.parsed);
CHECK_EQ(in_data.size(), static_cast<size_t>(param_.num_args * 3));
CHECK_EQ(out_data.size(), 3U);
// Collect data and output min/max
std::vector<float> data_min(param_.num_args);
std::vector<float> data_max(param_.num_args);
float output_min = 0.f;
ZhennanQin marked this conversation as resolved.
Show resolved Hide resolved
float output_max = 0.f;
for (int i = 0; i < param_.num_args; ++i) {
data_min[i] = in_data[param_.num_args + 2 * i].data().dptr<float>()[0];
if (data_min[i] < output_min) output_min = data_min[i];
data_max[i] = in_data[param_.num_args + 2 * i + 1].data().dptr<float>()[0];
if (data_max[i] > output_max) output_max = data_max[i];
}
out_data[quantized_concat_enum::kMin].data().dptr<float>()[0] = output_min;
out_data[quantized_concat_enum::kMax].data().dptr<float>()[0] = output_max;
auto out_scale = GetScale(out_data[quantized_concat_enum::kOut], output_min, output_max);
std::vector<mkldnn::memory::primitive_desc> data_md;
std::vector<const mkldnn::memory*> data_mem;
// new_data_mem is for auto-free new created mkldnn memory
std::vector<std::shared_ptr<mkldnn::memory>> new_data_mem;
for (int i = 0; i < param_.num_args; ++i) {
auto i_scale = GetScale(in_data[i], data_min[i], data_max[i]);
if (i_scale == out_scale) {
auto mem = in_data[i].GetMKLDNNData();
data_mem.push_back(mem);
data_md.push_back(mem->get_primitive_desc());
} else {
auto mem = in_data[i].GetMKLDNNData();
auto pd = mem->get_primitive_desc();
const auto rescaled_mem = std::make_shared<mkldnn::memory>(pd);
new_data_mem.push_back(rescaled_mem);
std::vector<float> reorder_scale = {out_scale / i_scale};
primitive_attr reorder_attr;
reorder_attr.set_int_output_round_mode(round_mode::round_nearest);
reorder_attr.set_output_scales(0, reorder_scale);
const auto reorder_pd = mkldnn::reorder::primitive_desc(pd, pd, reorder_attr);
MKLDNNStream::Get()->RegisterPrim(mkldnn::reorder(reorder_pd, *mem, *rescaled_mem));
data_mem.push_back(rescaled_mem.get());
data_md.push_back(pd);
}
}
MKLDNNConcatFwd& fwd = GetConcatForward(param_.dim, in_data, data_md);
mxnet::mkldnn_output_t out_mem =
CreateMKLDNNMem(out_data[quantized_concat_enum::kOut], fwd.fwd_pd.dst_primitive_desc(),
req[concat_enum::kOut]);
fwd.SetNewMem(data_mem, *out_mem.second);
MKLDNNStream::Get()->RegisterPrim(fwd.GetFwd());
CommitOutput(out_data[concat_enum::kOut], out_mem);
MKLDNNStream::Get()->Submit();
}

inline static bool ConcatStorageType(const nnvm::NodeAttrs& attrs, const int dev_mask,
DispatchMode* dispatch_mode, std::vector<int>* in_attrs,
std::vector<int>* out_attrs) {
const ConcatParam& param_ = nnvm::get<ConcatParam>(attrs.parsed);
CHECK_EQ(in_attrs->size(), static_cast<size_t>(param_.num_args * 3));
CHECK_EQ(out_attrs->size(), 3U);

return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);
}

NNVM_REGISTER_OP(_contrib_quantized_concat)
.set_attr<FInferStorageType>("FInferStorageType", ConcatStorageType)
.set_attr<FComputeEx>("FComputeEx<cpu>", MKLDNNQuantizedConcatForward)
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
.set_attr<bool>("TIsMKLDNN", true);

} // namespace op
} // namespace mxnet

#endif // MXNET_USE_MKLDNN == 1
2 changes: 2 additions & 0 deletions src/operator/quantization/quantization_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
namespace mxnet {
namespace op {

static const size_t kUint8Range = 255;
static const size_t kInt8Range = 127;

template<typename T>
MSHADOW_XINLINE int Sign(T val) {
Expand Down
Loading