From b5a353c26c040fd26d763485482ee7ff4544c3ca Mon Sep 17 00:00:00 2001
From: thxCode <thxcode0824@gmail.com>
Date: Tue, 16 Jul 2024 13:12:31 +0800
Subject: [PATCH] refactor: update params

Signed-off-by: thxCode <thxcode0824@gmail.com>
---
 llama-box/param.hpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llama-box/param.hpp b/llama-box/param.hpp
index c0fcb7f..81c298a 100644
--- a/llama-box/param.hpp
+++ b/llama-box/param.hpp
@@ -155,6 +155,7 @@ static void llama_box_params_print_usage(int, char **argv, const llama_box_param
     opts.push_back({ "*",           "-dt,   --defrag-thold N",       "KV cache defragmentation threshold (default: %.1f, < 0 - disabled)", (double)params.defrag_thold });
     opts.push_back({ "*",           "-np,   --parallel N",           "number of parallel sequences to decode (default: %d)", params.n_parallel });
     opts.push_back({ "*",           "-cb,   --cont-batching",        "enable continuous batching (a.k.a dynamic batching) (default: %s)", params.cont_batching ? "enabled" : "disabled" });
+    opts.push_back({ "*",           "-nocb, --no-cont-batching",     "disable continuous batching" });
     opts.push_back({ "*",           "       --mmproj FILE",          "path to a multimodal projector file for LLaVA" });
     if (llama_supports_mlock()) {
         opts.push_back({ "*",           "       --mlock",                "force system to keep model in RAM rather than swapping or compressing" });
@@ -777,6 +778,11 @@ static bool llama_box_params_parse(int argc, char **argv, llama_box_params &bpar
                 continue;
             }
 
+            if (!strcmp(flag, "-nocb") || !strcmp(flag, "--no-cont-batching")) {
+                bparams.gparams.cont_batching = false;
+                continue;
+            }
+
             if (!strcmp(flag, "--mmproj")) {
                 if (i == argc) {
                     missing("--mmproj");