ggerganov · zhouwg · Apr 15, 2024 · Apr 22, 2024
diff --git a/ggml.c b/ggml.c
@@ -2195,6 +2195,7 @@ struct ggml_context {
     bool   mem_buffer_owned;
     bool   no_alloc;
     bool   no_alloc_save; // this is used to save the no_alloc state when using scratch buffers
+    bool   use_hwaccel;
 
     int    n_objects;
 
@@ -2754,6 +2755,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
         /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
         /*.no_alloc           =*/ params.no_alloc,
         /*.no_alloc_save      =*/ params.no_alloc,
+        /*.use_hwaccel        =*/ params.use_hwaccel,
         /*.n_objects          =*/ 0,
         /*.objects_begin      =*/ NULL,
         /*.objects_end        =*/ NULL,
@@ -2985,9 +2987,13 @@ static struct ggml_tensor * ggml_new_tensor_impl(
         /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
         /*.name         =*/ { 0 },
         /*.extra        =*/ NULL,
+        /*.rank         =*/ n_dims,
         /*.padding      =*/ { 0 },
     };
 
+    if (ctx->use_hwaccel)
+        result->backend = GGML_BACKEND_TYPE_GPU;
+
     // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
     //ggml_assert_aligned(result->data);
 

diff --git a/ggml.h b/ggml.h
@@ -591,7 +591,9 @@ extern "C" {
 
         void * extra; // extra things e.g. for ggml-cuda.cu
 
-        char padding[8];
+        int32_t rank;
+
+        char padding[20];
     };
 
     static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
@@ -657,6 +659,7 @@ extern "C" {
         size_t mem_size;   // bytes
         void * mem_buffer; // if NULL, memory will be allocated internally
         bool   no_alloc;   // don't allocate memory for the tensor data
+        bool   use_hwaccel;
     };
 
 

diff --git a/whisper.cpp b/whisper.cpp
@@ -6518,6 +6518,9 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
                 /*.no_alloc   =*/ false,
             };
 
+#ifdef GGML_USE_QNN
+            gparams.use_hwaccel   = true;
+#endif
             struct ggml_context * ctx0 = ggml_init(gparams);
 
             struct ggml_tensor * a = ggml_new_tensor_2d(ctx0, wtype,         N, N);