diff --git a/Makefile b/Makefile index be4971d621dc..bc54f319b924 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=cce5a9007572c6e9fa522296b77571d2e5071357 +CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445 # go-rwkv version RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index d21735daa396..7b75de5bd0cf 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -203,7 +203,7 @@ struct llama_client_slot std::string stopping_word; // sampling - struct common_sampler_params sparams; + struct common_params_sampling sparams; common_sampler *ctx_sampling = nullptr; int32_t ga_i = 0; // group-attention state @@ -662,7 +662,7 @@ struct llama_server_context bool launch_slot_with_data(llama_client_slot* &slot, json data) { slot_params default_params; - common_sampler_params default_sparams; + common_params_sampling default_sparams; slot->params.stream = json_value(data, "stream", false); slot->params.cache_prompt = json_value(data, "cache_prompt", false);