Skip to content

Commit

Permalink
refactor: --rpc param
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <thxcode0824@gmail.com>
  • Loading branch information
thxCode committed Aug 21, 2024
1 parent da4d696 commit 28be1bb
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 19 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,8 @@ general:
--frequency-penalty N repeat alpha frequency penalty (default: 0.0, 0.0 = disabled)
--dynatemp-range N dynamic temperature range (default: 0.0, 0.0 = disabled)
--dynatemp-exp N dynamic temperature exponent (default: 1.0)
--mirostat N use Mirostat sampling.
Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.
--mirostat N use Mirostat sampling,
Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
--mirostat-lr N Mirostat learning rate, parameter eta (default: 0.1)
--mirostat-ent N Mirostat target entropy, parameter tau (default: 5.0)
Expand Down Expand Up @@ -180,7 +180,6 @@ general:
-cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: enabled)
-nocb, --no-cont-batching disable continuous batching
--mmproj FILE path to a multimodal projector file for LLaVA
--rpc SERVERS comma separated list of RPC servers
--mlock force system to keep model in RAM rather than swapping or compressing
--no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)
--numa TYPE attempt optimizations that help on some NUMA systems
Expand All @@ -202,7 +201,7 @@ general:
add a control vector with user defined scaling SCALE
--control-vector-layer-range START END
layer range to apply the control vector(s) to, start and end inclusive
--spm-infill use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this. (default: disabled)
--spm-infill use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this (default: disabled)
-sp, --special special tokens output enabled (default: false)
-ngl, --gpu-layers N number of layers to store in VRAM
-sm, --split-mode SPLIT_MODE how to split the model across multiple GPUs, one of:
Expand Down Expand Up @@ -238,7 +237,8 @@ server:
--conn-idle N server connection idle in seconds (default: 60)
--conn-keepalive N server connection keep-alive in seconds (default: 15)
-tps --tokens-per-second N maximum number of tokens per second (default: 0, 0 = disabled, -1 = try to detect)
when enabled, limit the request within its X-Request-Tokens-Per-Second HTTP header.
when enabled, limit the request within its X-Request-Tokens-Per-Second HTTP header
--rpc SERVERS comma separated list of RPC servers
logging:
Expand Down
28 changes: 14 additions & 14 deletions llama-box/param.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ static void llama_box_params_print_usage(int, char **argv, const llama_box_param
opts.push_back({ "*", " --frequency-penalty N", "repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)", (double)sparams.penalty_freq });
opts.push_back({ "*", " --dynatemp-range N", "dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)sparams.dynatemp_range });
opts.push_back({ "*", " --dynatemp-exp N", "dynamic temperature exponent (default: %.1f)", (double)sparams.dynatemp_exponent });
opts.push_back({ "*", " --mirostat N", "use Mirostat sampling.\n"
"Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"
opts.push_back({ "*", " --mirostat N", "use Mirostat sampling,\n"
"Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used\n"
"(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", sparams.mirostat });
opts.push_back({ "*", " --mirostat-lr N", "Mirostat learning rate, parameter eta (default: %.1f)", (double)sparams.mirostat_eta });
opts.push_back({ "*", " --mirostat-ent N", "Mirostat target entropy, parameter tau (default: %.1f)", (double)sparams.mirostat_tau });
Expand Down Expand Up @@ -156,7 +156,6 @@ static void llama_box_params_print_usage(int, char **argv, const llama_box_param
opts.push_back({ "*", "-cb, --cont-batching", "enable continuous batching (a.k.a dynamic batching) (default: %s)", params.cont_batching ? "enabled" : "disabled" });
opts.push_back({ "*", "-nocb, --no-cont-batching", "disable continuous batching" });
opts.push_back({ "*", " --mmproj FILE", "path to a multimodal projector file for LLaVA" });
opts.push_back({ "*", " --rpc SERVERS", "comma separated list of RPC servers" });
if (llama_supports_mlock()) {
opts.push_back({ "*", " --mlock", "force system to keep model in RAM rather than swapping or compressing" });
}
Expand All @@ -182,7 +181,7 @@ static void llama_box_params_print_usage(int, char **argv, const llama_box_param
"add a control vector with user defined scaling SCALE" });
opts.push_back({ "*", " --control-vector-layer-range START END",
"layer range to apply the control vector(s) to, start and end inclusive" });
opts.push_back({ "*", " --spm-infill", "use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this. (default: %s)", params.spm_infill ? "enabled" : "disabled" });
opts.push_back({ "*", " --spm-infill", "use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this (default: %s)", params.spm_infill ? "enabled" : "disabled" });
opts.push_back({ "*", "-sp, --special", "special tokens output enabled (default: %s)", params.special ? "true" : "false" });
if (llama_supports_gpu_offload()) {
opts.push_back({ "*", "-ngl, --gpu-layers N", "number of layers to store in VRAM" });
Expand Down Expand Up @@ -220,7 +219,8 @@ static void llama_box_params_print_usage(int, char **argv, const llama_box_param
opts.push_back({ "server", " --conn-idle N", "server connection idle in seconds (default: %d)", bparams.conn_idle });
opts.push_back({ "server", " --conn-keepalive N", "server connection keep-alive in seconds (default: %d)", bparams.conn_keepalive });
opts.push_back({ "server", "-tps --tokens-per-second N", "maximum number of tokens per second (default: %d, 0 = disabled, -1 = try to detect)\n"
"when enabled, limit the request within its X-Request-Tokens-Per-Second HTTP header.", bparams.n_tps });
"when enabled, limit the request within its X-Request-Tokens-Per-Second HTTP header", bparams.n_tps });
opts.push_back({ "server", " --rpc SERVERS", "comma separated list of RPC servers" });

opts.push_back({ "logging" });
opts.push_back({ "logging", " --log-format {text,json}",
Expand Down Expand Up @@ -834,15 +834,6 @@ static bool llama_box_params_parse(int argc, char **argv, llama_box_params &bpar
continue;
}

if (!strcmp(flag, "--rpc")) {
if (i == argc) {
missing("--rpc");
}
char *arg = argv[i++];
bparams.gparams.rpc_servers = arg;
continue;
}

if (llama_supports_mlock()) {
if (!strcmp(flag, "--mlock")) {
bparams.gparams.use_mlock = true;
Expand Down Expand Up @@ -1189,6 +1180,15 @@ static bool llama_box_params_parse(int argc, char **argv, llama_box_params &bpar
continue;
}

if (!strcmp(flag, "--rpc")) {
if (i == argc) {
missing("--rpc");
}
char *arg = argv[i++];
bparams.gparams.rpc_servers = arg;
continue;
}

// logging flags

if (!strcmp(flag, "--log-format")) {
Expand Down

0 comments on commit 28be1bb

Please sign in to comment.