From b5a353c26c040fd26d763485482ee7ff4544c3ca Mon Sep 17 00:00:00 2001 From: thxCode Date: Tue, 16 Jul 2024 13:12:31 +0800 Subject: [PATCH] refactor: update params Signed-off-by: thxCode --- llama-box/param.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llama-box/param.hpp b/llama-box/param.hpp index c0fcb7f..81c298a 100644 --- a/llama-box/param.hpp +++ b/llama-box/param.hpp @@ -155,6 +155,7 @@ static void llama_box_params_print_usage(int, char **argv, const llama_box_param opts.push_back({ "*", "-dt, --defrag-thold N", "KV cache defragmentation threshold (default: %.1f, < 0 - disabled)", (double)params.defrag_thold }); opts.push_back({ "*", "-np, --parallel N", "number of parallel sequences to decode (default: %d)", params.n_parallel }); opts.push_back({ "*", "-cb, --cont-batching", "enable continuous batching (a.k.a dynamic batching) (default: %s)", params.cont_batching ? "enabled" : "disabled" }); + opts.push_back({ "*", "-nocb, --no-cont-batching", "disable continuous batching" }); opts.push_back({ "*", " --mmproj FILE", "path to a multimodal projector file for LLaVA" }); if (llama_supports_mlock()) { opts.push_back({ "*", " --mlock", "force system to keep model in RAM rather than swapping or compressing" }); @@ -777,6 +778,11 @@ static bool llama_box_params_parse(int argc, char **argv, llama_box_params &bpar continue; } + if (!strcmp(flag, "-nocb") || !strcmp(flag, "--no-cont-batching")) { + bparams.gparams.cont_batching = false; + continue; + } + if (!strcmp(flag, "--mmproj")) { if (i == argc) { missing("--mmproj");