diff --git a/Makefile b/Makefile
index be4971d621dc..bc54f319b924 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=cce5a9007572c6e9fa522296b77571d2e5071357
+CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index d21735daa396..7b75de5bd0cf 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -203,7 +203,7 @@ struct llama_client_slot
     std::string stopping_word;
 
     // sampling
-    struct common_sampler_params sparams;
+    struct common_params_sampling sparams;
     common_sampler *ctx_sampling = nullptr;
 
     int32_t ga_i = 0;   // group-attention state
@@ -662,7 +662,7 @@ struct llama_server_context
 
     bool launch_slot_with_data(llama_client_slot* &slot, json data) {
         slot_params default_params;
-        common_sampler_params default_sparams;
+        common_params_sampling default_sparams;
  
         slot->params.stream             = json_value(data, "stream",            false);
         slot->params.cache_prompt       = json_value(data, "cache_prompt",      false);