Skip to content

Commit

Permalink
Scs trans match v2.9 (#7368)
Browse files Browse the repository at this point in the history
* [xpu] support qkv-fused weight reuse in scs_tran_match (#7293)

* [xpu] fix qkv-fused bias check

* [Cherry-pick][X86][ARM] scale support int, int64 (#6590)
  • Loading branch information
newway authored Oct 22, 2021
1 parent 6c5af8a commit 92eb5d7
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 69 deletions.
24 changes: 22 additions & 2 deletions lite/kernels/x86/scale_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,26 @@ REGISTER_LITE_KERNEL(scale,
kNCHW,
paddle::lite::kernels::x86::ScaleCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
.Finalize();

REGISTER_LITE_KERNEL(scale,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::ScaleCompute<int>,
int32)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
.Finalize();

REGISTER_LITE_KERNEL(scale,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::ScaleCompute<int64_t>,
int64)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
.Finalize();
8 changes: 5 additions & 3 deletions lite/kernels/x86/scale_compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace x86 {

template <typename T>
void scale_compute(
const T* x, T* out, int size, float scale, float bias, bool bias_before) {
const T* x, T* out, int size, T scale, T bias, bool bias_before) {
if (bias_before) bias *= scale;
for (int i = 0; i < size; i++) {
out[i] = x[i] * scale + bias;
Expand All @@ -41,11 +41,13 @@ class ScaleCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {

void Run() override {
auto& param = *param_.get_mutable<param_t>();
T scale = static_cast<T>(param.scale);
T bias = static_cast<T>(param.bias);
scale_compute(param.x->template data<T>(),
param.output->template mutable_data<T>(),
param.x->dims().production(),
param.scale,
param.bias,
scale,
bias,
!param.bias_after_scale);
}

Expand Down
108 changes: 44 additions & 64 deletions lite/tests/kernels/scale_compute_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
namespace paddle {
namespace lite {

template <typename T>
class ScaleComputeTester : public arena::TestCase {
protected:
// common attributes for this op.
Expand All @@ -31,7 +32,6 @@ class ScaleComputeTester : public arena::TestCase {
float bias_ = 0.;
bool bias_after_scale_ = true;
bool have_relu6 = false;
PrecisionType x_dtype_ = PRECISION(kFloat);
std::string act_type_ = "relu6";
float alpha_ = 6.0f;

Expand All @@ -42,18 +42,15 @@ class ScaleComputeTester : public arena::TestCase {
float scale,
float bias,
bool bias_after_scale = true,
bool have_relu6 = false,
PrecisionType x_dtype = PRECISION(kFloat))
bool have_relu6 = false)
: TestCase(place, alias),
x_dims_(x_dims),
scale_(scale),
bias_(bias),
bias_after_scale_(bias_after_scale),
have_relu6(have_relu6),
x_dtype_(x_dtype) {}
have_relu6(have_relu6) {}

template <typename T>
void RunBaselineHelper(Scope* scope) {
void RunBaseline(Scope* scope) override {
auto* x = scope->FindTensor(x_);
auto* x_data = x->template data<T>();
auto* out = scope->NewTensor(out_);
Expand All @@ -77,20 +74,6 @@ class ScaleComputeTester : public arena::TestCase {
}
}

void RunBaseline(Scope* scope) override {
switch (x_dtype_) {
case PRECISION(kFloat):
RunBaselineHelper<float>(scope);
break;
case PRECISION(kInt32):
RunBaselineHelper<int>(scope);
break;
default:
LOG(FATAL) << "unsupported data type: " << PrecisionToStr(x_dtype_);
break;
}
}

void PrepareOpDesc(cpp::OpDesc* op_desc) {
op_desc->SetType("scale");
op_desc->SetInput("X", {x_});
Expand All @@ -104,33 +87,27 @@ class ScaleComputeTester : public arena::TestCase {
}
}

template <typename T>
void PrepareDataHelper() {
std::vector<T> dx(x_dims_.production());
fill_data_rand<T>(dx.data(), -10, 10, x_dims_.production());
SetCommonTensor(x_, x_dims_, dx.data());
}

void PrepareData() override {
switch (x_dtype_) {
case PRECISION(kFloat):
PrepareDataHelper<float>();
break;
case PRECISION(kInt32):
PrepareDataHelper<int>();
break;
default:
LOG(FATAL) << "unsupported data type: " << PrecisionToStr(x_dtype_);
break;
}
std::vector<T> dx(x_dims_.production());
fill_data_rand<T>(dx.data(),
static_cast<T>(-10),
static_cast<T>(10),
x_dims_.production());
SetCommonTensor(x_, x_dims_, dx.data());
}
};

void TestScaleShape(Place place, float abs_error) {
for (auto x_dims :
std::vector<std::vector<int64_t>>{{5, 2, 3, 4}, {8, 3, 5}, {12, 3}}) {
std::unique_ptr<arena::TestCase> tester(
new ScaleComputeTester(place, "def", DDim(x_dims), 1.5f, 0.2f, true));
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester<float>(
place, "def", DDim(x_dims), 1.5f, 0.2f, true));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
Expand All @@ -139,7 +116,7 @@ void TestScaleShape(Place place, float abs_error) {
void TestScaleValue(Place place, float abs_error) {
for (float scale : {0.123, 0., -1.2}) {
for (float bias : {1., 0., -1.2331}) {
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester(
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester<float>(
place, "def", DDim({5, 2, 3, 4}), scale, bias));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
Expand All @@ -149,27 +126,19 @@ void TestScaleValue(Place place, float abs_error) {

void TestScaleOrder(Place place, float abs_error) {
for (bool bias_after_scale : {true, false}) {
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester(
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester<float>(
place, "def", DDim({2, 3, 4, 5}), 1.5f, 0.2f, bias_after_scale));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}

void TestScaleDtype(Place place, float abs_error) {
for (PrecisionType x_dtype : {PRECISION(kFloat), PRECISION(kInt32)}) {
if (x_dtype == PRECISION(kFloat)) {
place.precision = PRECISION(kFloat);
} else if (x_dtype == PRECISION(kInt32)) {
place.precision = PRECISION(kInt32);
} else {
LOG(FATAL) << "fatal";
}
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester(
place, "def", DDim({2, 3, 4, 5}), 2.f, 1.f, true, false, x_dtype));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
template <typename T>
void TestScaleDtype(Place place, float abs_error, std::string alias) {
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester<T>(
place, alias, DDim({2, 3, 4, 5}), 2.f, 1.f, true, false));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}

void TestScaleRelu6(Place place, float abs_error) {
Expand All @@ -178,13 +147,13 @@ void TestScaleRelu6(Place place, float abs_error) {
for (bool bias_after_scale : {true, false}) {
for (bool have_relu6 : {true, false}) {
std::unique_ptr<arena::TestCase> tester(
new ScaleComputeTester(place,
"def",
DDim(x_dims),
1.5f,
0.2f,
bias_after_scale,
have_relu6));
new ScaleComputeTester<float>(place,
"def",
DDim(x_dims),
1.5f,
0.2f,
bias_after_scale,
have_relu6));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
Expand All @@ -203,7 +172,7 @@ TEST(Scale, precision) {
abs_error = 5e-2; // Using fp16 in OPENCL
#elif defined(LITE_WITH_ARM)
place = TARGET(kARM);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
#elif defined(LITE_WITH_XPU)
place = TARGET(kXPU);
abs_error = 3e-4; // Some operations use fp16 in XPU
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
Expand All @@ -221,10 +190,6 @@ TEST(Scale, precision) {
#if defined(LITE_WITH_OPENCL)
TestScaleRelu6(place, abs_error);
#endif
#if defined(LITE_WITH_ARM) && !defined(LITE_WITH_NPU) && \
!defined(LITE_WITH_OPENCL)
TestScaleDtype(place, abs_error);
#endif
}

TEST(Scale, performance) {
Expand All @@ -237,7 +202,7 @@ TEST(Scale, performance) {
return;
#endif

std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester(
std::unique_ptr<arena::TestCase> tester(new ScaleComputeTester<float>(
place, "def", DDim(std::vector<int64_t>{5, 2, 3, 4}), 1.2, 1.1, true));

// To modify the arm context, one can retrive the context as follows.
Expand All @@ -249,5 +214,20 @@ TEST(Scale, performance) {
arena.TestPerformance(100);
}

TEST(Scale, dtype) {
Place place;
float abs_error = 1e-4;
#if defined(LITE_WITH_ARM)
place = TARGET(kARM);
#elif defined(LITE_WITH_X86)
place = TARGET(kX86);
#else
return;
#endif

TestScaleDtype<int>(place, abs_error, "int32");
TestScaleDtype<int64_t>(place, abs_error, "int64");
}

} // namespace lite
} // namespace paddle

0 comments on commit 92eb5d7

Please sign in to comment.