Skip to content

Commit

Permalink
PoC-S26: offload simple f32 2x2 matrix addition operation to QNN CPU …
Browse files Browse the repository at this point in the history
…backend --- done but not perfect
  • Loading branch information
zhouwg committed Apr 7, 2024
1 parent 1cbb98f commit 4e08bd4
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.concurrent.atomic.AtomicBoolean;

import butterknife.BindView;
Expand Down Expand Up @@ -448,6 +450,13 @@ public void run() {
String benchmarkTip = "Bench:" + CDEUtils.getBenchmarkDesc(benchmarkIndex) + " (model: " + strModeName
+ " ,threads: " + nThreadCounts
+ " ) cost " + duration + " milliseconds";
//04-07-2024(April,7,2024), add timestamp
String timestamp = "";
SimpleDateFormat fullDateFormat = new SimpleDateFormat("yyyy-MM-dd,HH:mm:ss");
Date date = new Date(System.currentTimeMillis());
timestamp = fullDateFormat.format(date);
benchmarkTip += ", on " + timestamp;

benchmarkTip += "\n";

//becareful here
Expand Down
183 changes: 166 additions & 17 deletions core/ggml/jni/ggml-qnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4691,26 +4691,29 @@ int qnn_implementation::run_qnn_matrix() {
"tensor_1"
};

Qnn_Tensor_t tensor_2 = (Qnn_Tensor_t) {
.version= QNN_TENSOR_VERSION_1,
{.v1= {
.id=0,
.name= "tensor_2",
.type= QNN_TENSOR_TYPE_APP_READ,
.dataFormat= QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
.dataType= QNN_DATATYPE_FLOAT_32,
.quantizeParams= { QNN_DEFINITION_UNDEFINED,
QNN_QUANTIZATION_ENCODING_UNDEFINED,
{.scaleOffsetEncoding= {.scale= 0.0000000000000000f, .offset= 0}}},
.rank= 2,
.dimensions= dimensions_output,
.memType= QNN_TENSORMEMTYPE_RAW,
{.clientBuf= { .data=nullptr,
.dataSize=0}}}}};

Qnn_Tensor_t outputs[] = {
(Qnn_Tensor_t) {
.version= QNN_TENSOR_VERSION_1,
{.v1= {
.id=0,
.name= "tensor_2",
.type= QNN_TENSOR_TYPE_APP_READ,
.dataFormat= QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
.dataType= QNN_DATATYPE_FLOAT_32,
.quantizeParams= { QNN_DEFINITION_UNDEFINED,
QNN_QUANTIZATION_ENCODING_UNDEFINED,
{.scaleOffsetEncoding= {.scale= 0.0000000000000000f, .offset= 0}}},
.rank= 2,
.dimensions= dimensions_output,
.memType= QNN_TENSORMEMTYPE_RAW,
{.clientBuf= { .data=nullptr,
.dataSize=0}}}}}
tensor_2
};

Qnn_Param_t context_params[] = {}; // 04-05-2024(April-5,2024), to make QNN SDK happy

add_tensor("qnn_matrix_addtition", &tensor_0);
add_tensor("qnn_matrix_addtition", &tensor_1);
add_node(QNN_OPCONFIG_VERSION_1, // Op_Config_t Version
Expand Down Expand Up @@ -4796,6 +4799,7 @@ int qnn_matrix(int n_backend_type, int n_op_type) {
uint8_t * qnn_buffer = nullptr;
Qnn_Tensor_t tensor_0 = QNN_TENSOR_INIT;
Qnn_Tensor_t tensor_1 = QNN_TENSOR_INIT;
Qnn_Tensor_t tensor_2 = QNN_TENSOR_INIT;
Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
Qnn_OpConfig_t qnn_opconfig = QNN_OPCONFIG_INIT;

Expand Down Expand Up @@ -4826,7 +4830,152 @@ int qnn_matrix(int n_backend_type, int n_op_type) {
LOGGD("alloc rpcmem successfully\n");
}

qnn_backend.run_qnn_matrix();
if (0) {
qnn_backend.run_qnn_matrix(); //TODO: QNN pipeline works but result of output tensor is incorrect
} else { //the following workaround method works fine as expect
int error = 0;
Qnn_GraphHandle_t graph_handle = nullptr;
error = qnn_raw_interface.graphCreate(qnn_backend.get_qnn_context_handle(), "qnn_matrix_addition", nullptr, &graph_handle);
LOGGI("error = %d\n", error);
uint32_t dimensions_input_0[] = {2, 2};
uint32_t dimensions_input_1[] = {2, 2};
uint32_t dimensions_output[] = {2, 2};

float input_matrix[2][4] = { {1, 1, 1, 1}, {2, 2, 2, 2}};
float output_matrix[1][4] = { {1.0,1.0,1.0,1.0} };

tensor_0 = {
.version= QNN_TENSOR_VERSION_1,
{.v1= {
.id=0,
.name= "tensor_0",
.type= QNN_TENSOR_TYPE_APP_WRITE,
.dataFormat= QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
.dataType= QNN_DATATYPE_FLOAT_32,
.quantizeParams= { QNN_DEFINITION_UNDEFINED,
QNN_QUANTIZATION_ENCODING_UNDEFINED,
{.scaleOffsetEncoding= {.scale= 0.0000000000000000f, .offset= 0}}},
.rank= 2,
.dimensions=dimensions_input_0,
.memType= QNN_TENSORMEMTYPE_RAW,
{.clientBuf= { .data=nullptr,
.dataSize=0}}}}
};

tensor_1 = {
.version= QNN_TENSOR_VERSION_1,
{.v1= {
.id=0,
.name= "tensor_1",
.type= QNN_TENSOR_TYPE_APP_WRITE,
.dataFormat= QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
.dataType= QNN_DATATYPE_FLOAT_32,
.quantizeParams= { QNN_DEFINITION_UNDEFINED,
QNN_QUANTIZATION_ENCODING_UNDEFINED,
{.scaleOffsetEncoding= {.scale= 0.0000000000000000f, .offset= 0}}},
.rank= 2,
.dimensions=dimensions_input_1,
.memType= QNN_TENSORMEMTYPE_RAW,
{.clientBuf= { .data=nullptr,
.dataSize=0}}}}
};

//here is similar to OpenMAX IL
Qnn_Tensor_t tensor_2 = (Qnn_Tensor_t) {
.version= QNN_TENSOR_VERSION_1,
{.v1= {
.id=0,
.name= "tensor_2",
.type= QNN_TENSOR_TYPE_APP_READ,
.dataFormat= QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
.dataType= QNN_DATATYPE_FLOAT_32,
.quantizeParams= { QNN_DEFINITION_UNDEFINED,
QNN_QUANTIZATION_ENCODING_UNDEFINED,
{.scaleOffsetEncoding= {.scale= 0.0000000000000000f, .offset= 0}}},
.rank= 2,
.dimensions= dimensions_output,
.memType= QNN_TENSORMEMTYPE_RAW,
{.clientBuf= { .data=nullptr,
.dataSize=0}}}}};


error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, &tensor_0);
LOGGI("error = %d\n", error);
error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, &tensor_1);
LOGGI("error = %d\n", error);
error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, &tensor_2);
LOGGI("error = %d\n", error);

QNN_VER_PTR(tensor_0)->clientBuf = { input_matrix[0], 16};
QNN_VER_PTR(tensor_1)->clientBuf = { input_matrix[1], 16};
QNN_VER_PTR(tensor_2)->clientBuf = { output_matrix[0], 16};

//for this single compute node in a single compute graph, nullptr is ok
Qnn_Param_t params[] = {};
Qnn_Tensor_t tensor_inputs[] = {
tensor_0,
tensor_1
};

Qnn_Tensor_t tensor_outputs[] = {
tensor_2
};

Qnn_OpConfig_t opconfig = {
(Qnn_OpConfigVersion_t) 1, .v1 = {
"qnn_matrix_addition",
QNN_OP_PACKAGE_NAME_QTI_AISW,
QNN_OP_ELEMENT_WISE_ADD,//https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/MasterOpDef.html#elementwiseadd
0,
params,
2,
tensor_inputs,
1,
tensor_outputs
}
};
error = qnn_raw_interface.graphAddNode(graph_handle, opconfig);
LOGGI("error = %d\n", error);
error = qnn_raw_interface.graphFinalize(graph_handle, nullptr, nullptr);
LOGGI("error = %d\n", error);
//TODO:hardcode in PoC stage, shape of input matrix is 2x2
for (size_t i = 0; i < 2; i++) {
if (0 == i) {
LOGGD("input matrix A:\n");
GGML_JNI_NOTIFY("input matrix A:");
} else if (1 == i) {
LOGGD("input matrix B:\n");
GGML_JNI_NOTIFY("input matrix B:");
}
float *temp = input_matrix[i];
LOGGD("%.2f \t %.2f\n", temp[0], temp[1]);
LOGGD("%.2f \t %.2f\n", temp[2], temp[3]);
GGML_JNI_NOTIFY("%.2f \t %.2f\n", temp[0], temp[1]);
GGML_JNI_NOTIFY("%.2f \t %.2f\n", temp[2], temp[3]);
}
error = qnn_raw_interface.graphExecute(graph_handle, tensor_inputs, 2, tensor_outputs, 1, nullptr, nullptr);
LOGGI("error = %d\n", error);
if (0 == error) {
float * temp = (float*)((QNN_VER_PTR(tensor_2)->clientBuf.data));
LOGGD("output tensor:%.2f %.2f %.2f %.2f\n", temp[0], temp[1], temp[2], temp[3]);

if (0 == result) { // works fine as expected at the first time on 04-07(April,7),17:00, 2024
//TODO:hardcode in PoC stage, shape of output matrix is 2x2
LOGGD("output matrix:\n");
GGML_JNI_NOTIFY("output matrix:");
for (size_t i = 0; i < 1; i++) {
float * temp = output_matrix[i];
LOGGD("%.2f \t %.2f\n", temp[0], temp[1]);
LOGGD("%.2f \t %.2f\n", temp[2], temp[3]);
GGML_JNI_NOTIFY("%.2f \t %.2f\n", temp[0], temp[1]);
GGML_JNI_NOTIFY("%.2f \t %.2f\n", temp[2], temp[3]);
}
LOGGD("\n");
}

}
}


failure:
qnn_backend.unregister_rpcmem();
Expand Down

0 comments on commit 4e08bd4

Please sign in to comment.