diff --git a/examples/quantize/test.sh b/examples/quantize/test.sh index fe64f09757d37..840f712accfda 100644 --- a/examples/quantize/test.sh +++ b/examples/quantize/test.sh @@ -42,7 +42,7 @@ echo PASS echo # 3. Requant model with '--keep_split' -$QUANTIZE --allow-requantize --keep_split $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K +$QUANTIZE --allow-requantize --keep_split $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K echo PASS echo @@ -52,7 +52,7 @@ echo PASS echo # 4. Requant mode without '--keep_split' -$QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K +$QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K echo PASS echo @@ -62,4 +62,4 @@ echo PASS echo # Clean up -rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf \ No newline at end of file +rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf diff --git a/llama.cpp b/llama.cpp index 550fc67351adc..a7f5da88dddf8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13618,13 +13618,13 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // Set split info if needed if (n_split > 1) { - for (int i = 0; i < ctx_outs.size(); ++i) { + for (size_t i = 0; i < ctx_outs.size(); ++i) { gguf_set_val_u16(ctx_outs[i], ml.llm_kv(LLM_KV_SPLIT_NO).c_str(), i); gguf_set_val_u16(ctx_outs[i], ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str(), n_split); gguf_set_val_i32(ctx_outs[i], ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str(), ml.n_tensors); } } - + int cur_split = -1; std::ofstream fout; auto close_ofstream = [&]() {