From 141eb5107f37b9d054e9c1c71e1b8ad2f6773d41 Mon Sep 17 00:00:00 2001
From: z5269887 <z5269887@unsw.edu.au>
Date: Mon, 22 Apr 2024 23:38:09 +0800
Subject: [PATCH] Update llama_model_quantize_params

---
 llama.cpp | 1 +
 llama.h   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 6e40c60109688..550fc67351adc 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -14184,6 +14184,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
         /*.quantize_output_tensor      =*/ true,
         /*.only_copy                   =*/ false,
         /*.pure                        =*/ false,
+        /*.keep_split                  =*/ false,
         /*.imatrix                     =*/ nullptr,
         /*.kv_overrides                =*/ nullptr,
     };
diff --git a/llama.h b/llama.h
index efe1f1f57f440..5fed561fc064e 100644
--- a/llama.h
+++ b/llama.h
@@ -288,9 +288,9 @@ extern "C" {
         bool quantize_output_tensor;         // quantize output.weight
         bool only_copy;                      // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
         bool pure;                           // quantize all tensors to the default type
+        bool keep_split;                     // quantize to the same number of shards
         void * imatrix;                      // pointer to importance matrix data
         void * kv_overrides;                 // pointer to vector containing overrides
-        bool keep_split;                     // quantize to the same number of shards
     } llama_model_quantize_params;
 
     // grammar types