From 141eb5107f37b9d054e9c1c71e1b8ad2f6773d41 Mon Sep 17 00:00:00 2001 From: z5269887 Date: Mon, 22 Apr 2024 23:38:09 +0800 Subject: [PATCH] Update llama_model_quantize_params --- llama.cpp | 1 + llama.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 6e40c60109688..550fc67351adc 100644 --- a/llama.cpp +++ b/llama.cpp @@ -14184,6 +14184,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() { /*.quantize_output_tensor =*/ true, /*.only_copy =*/ false, /*.pure =*/ false, + /*.keep_split =*/ false, /*.imatrix =*/ nullptr, /*.kv_overrides =*/ nullptr, }; diff --git a/llama.h b/llama.h index efe1f1f57f440..5fed561fc064e 100644 --- a/llama.h +++ b/llama.h @@ -288,9 +288,9 @@ extern "C" { bool quantize_output_tensor; // quantize output.weight bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored bool pure; // quantize all tensors to the default type + bool keep_split; // quantize to the same number of shards void * imatrix; // pointer to importance matrix data void * kv_overrides; // pointer to vector containing overrides - bool keep_split; // quantize to the same number of shards } llama_model_quantize_params; // grammar types