Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wasm gemm interface update #61

Merged
merged 3 commits into from
Nov 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions src/tensors/cpu/intgemm_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,10 +306,12 @@ class PrepareBiasForBNodeOp : public NaryNodeOp {
auto quant_mult_a = this->child(2)->val();
auto quant_mult_b = this->child(3)->val();

float unquant_mult = (-1)*((127.0f / *quant_mult_a->data())*(127.0f / *quant_mult_b->data()))/(127.0f); //Minus one to invert add_ps later on
#if defined(WASM)
int8PrepareBias((const int8_t *)b->data(), unquant_mult, 0.0, rows(b), cols(b), bias->data(), val_->data());
float scale_a = *quant_mult_a->data();
float scale_b = *quant_mult_b->data();
int8PrepareBias((const int8_t *)b->data(), scale_a, 0.0 /*zero_point_a*/, scale_b, 0.0 /*zero_point_b*/, rows(b), cols(b), bias->data(), val_->data());
#else
float unquant_mult = (-1)*((127.0f / *quant_mult_a->data())*(127.0f / *quant_mult_b->data()))/(127.0f); //Minus one to invert add_ps later on
intgemm::Int8Shift::PrepareBias((const int8_t *)b->data(), rows(b), cols(b), intgemm::callbacks::UnquantizeAndAddBiasAndWrite(unquant_mult, bias->data(), val_->data()));
#endif
}
Expand Down Expand Up @@ -341,10 +343,12 @@ class PrepareFakeBiasForBNodeOp : public NaryNodeOp {
auto quant_mult_a = this->child(1)->val();
auto quant_mult_b = this->child(2)->val();

float unquant_mult = (-1)*((127.0f / *quant_mult_a->data())*(127.0f / *quant_mult_b->data()))/(127.0f); //Minus one to invert add_ps later on
#if defined(WASM)
int8PrepareBias((const int8_t *)b->data(), unquant_mult, 0.0, rows(b), cols(b), nullptr/*input_bias*/, val_->data());
float scale_a = *quant_mult_a->data();
float scale_b = *quant_mult_b->data();
int8PrepareBias((const int8_t *)b->data(), scale_a, 0.0 /*zero_point_a*/, scale_b, 0.0 /*zero_point_b*/, rows(b), cols(b), nullptr/*input_bias*/, val_->data());
#else
float unquant_mult = (-1)*((127.0f / *quant_mult_a->data())*(127.0f / *quant_mult_b->data()))/(127.0f); //Minus one to invert add_ps later on
intgemm::Int8Shift::PrepareBias((const int8_t *)b->data(), rows(b), cols(b), intgemm::callbacks::UnquantizeAndWrite(unquant_mult, val_->data()));
#endif
}};
Expand Down Expand Up @@ -457,12 +461,13 @@ class AffineNodeOp : public NaryNodeOp {
ABORT_IF(!shifted_, "Int8::Multiply is not implemented for wasm.");

int8MultiplyAndAddBias(reinterpret_cast<int8_t *>(child(0)->val()->data()), /*A*/
unquant_mult, /*Scale of A*/
aQuantMult, /*Scale of A*/
0, /*zero point of A*/
reinterpret_cast<int8_t *>(child(1)->val()->data()), /*B*/
1, /*Scale of B*/
bQuantMult, /*Scale of B*/
0, /*zero point of B*/
child(2)->val()->data(), /*child(2) is bias*/
scalar_,
rows(child(0)->val()),
cols(child(0)->val()),
cols(child(1)->val()),
Expand Down
25 changes: 15 additions & 10 deletions src/tensors/cpu/wasm_intgemm_fallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,21 @@ extern "C" void int8PrepareBFromQuantizedTransposedFallback(const int8_t* input_
}

extern "C" void int8PrepareBiasFallback(const int8_t* input_B_prepared,
float scale,
float zero_point,
float scale_A,
float zero_point_A,
float scale_B,
float zero_point_B,
Index width,
Index cols_B,
const float* input_bias,
float* output) {
LOG(info, "Calling fallback implementation of \"int8PrepareBias\"");
float unquant_factor = (-1) * ((127.0f / scale_A) * (127.0f / scale_B)) / (127.0f);
abhi-agg marked this conversation as resolved.
Show resolved Hide resolved
intgemm::Int8Shift::PrepareBias(
input_B_prepared,
width,
cols_B,
intgemm::callbacks::UnquantizeAndAddBiasAndWrite(scale, input_bias, output));
intgemm::callbacks::UnquantizeAndAddBiasAndWrite(unquant_factor, input_bias, output));
}

extern "C" void int8MultiplyAndAddBiasFallback(const int8_t* input_A_prepared,
Expand All @@ -80,18 +83,20 @@ extern "C" void int8MultiplyAndAddBiasFallback(const int8_t* input_A_prepared,
float scale_B,
float zero_point_B,
const float* input_bias_prepared,
float unquant_multiplier,
Index rows_A,
Index width,
Index cols_B,
float* output) {
LOG(info, "Calling fallback implementation of \"int8MultiplyAndAddBias\"");
intgemm::Int8Shift::Multiply(
input_A_prepared,
input_B_prepared,
rows_A,
width,
cols_B,
intgemm::callbacks::UnquantizeAndAddBiasAndWrite(scale_A, input_bias_prepared, output));
float unquant_factor = unquant_multiplier / (scale_A * scale_B);
intgemm::Int8Shift::Multiply(input_A_prepared,
input_B_prepared,
rows_A,
width,
cols_B,
intgemm::callbacks::UnquantizeAndAddBiasAndWrite(
unquant_factor, input_bias_prepared, output));
}

extern "C" void int8SelectColumnsOfBFallback(const int8_t* input_B_prepared,
Expand Down
16 changes: 12 additions & 4 deletions src/tensors/cpu/wasm_intgemm_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,11 @@ int8PrepareA(const float* input_A,
*
* @param[in] input_B_prepared An array representing the prepared B matrix.
* Size of the array = `width` * `cols_B`.
* @param[in] scale The scaling factor (for quantization)
* @param[in] zero_point The zero point (for quantization)
* @param[in] scale_A The scaling factor (for quantization) of A
* @param[in] zero_point_A The zero point (for quantization) of A
* @param[in] scale_B The scaling factor (for quantization) of B
* @param[in] zero_point_B The zero point (for quantization) of B
* factor that is prepared from `scale_A` and `scale_B`.
* @param[in] width No. of rows of Input matrix B (unquantized & non-transposed).
* It should be a multiple of 64.
* @param[in] cols_B No. of columns of Input matrix B (unquantized & non-transposed)
Expand All @@ -181,8 +184,10 @@ int8PrepareA(const float* input_A,
*/
extern "C" void __attribute__((import_module("wasm_gemm"), import_name("int8_prepare_bias")))
int8PrepareBias(const int8_t* input_B_prepared,
float scale,
float zero_point,
float scale_A,
float zero_point_A,
float scale_B,
float zero_point_B,
Index width,
Index cols_B,
const float* input_bias,
Expand Down Expand Up @@ -212,6 +217,8 @@ int8PrepareBias(const int8_t* input_B_prepared,
* @param[in] input_bias_prepared An array representing the prepared bias.
* This must be obtained by using `int8PrepareBias` function.
* Size of the array = `cols_B`
* @param[in] unquant_multiplier A value that will be multiplied to the final unquantization
* factor that is prepared from `scale_A` and `scale_B`.
* @param[in] rows_A No. of rows of Input matrix A. No restriction on its size.
* @param[in] width No. of columns of Input matrix A (same as no. of columns of
* Input matrix B). It should be a multiple of 64.
Expand All @@ -228,6 +235,7 @@ extern "C" void
float scale_B,
float zero_point_B,
const float* input_bias_prepared,
float unquant_multiplier,
Index rows_A,
Index width,
Index cols_B,
Expand Down