mudler · mudler · Aug 30, 2024 · Aug 30, 2024
diff --git a/Makefile b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=9fe94ccac92693d4ae1bc283ff0574e8b3f4e765
+CPPLLAMA_VERSION?=0ab30f8d82fc7156b750c194d64a887e80cbfb82
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
@@ -1119,7 +1119,7 @@ struct llama_server_context
                 continue;
             }
 
-            if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
+            if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
                 LOG_TEE("Error processing the given image");
                 return false;
             }
@@ -2210,7 +2210,7 @@ static void params_parse(const backend::ModelOptions* request,
     params.model_alias =  request->modelfile();
     params.n_ctx = request->contextsize();
     //params.memory_f16 = request->f16memory();
-    params.n_threads = request->threads();
+    params.cpuparams.n_threads = request->threads();
     params.n_gpu_layers = request->ngpulayers();
     params.n_batch = request->nbatch();
     // Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1