diff --git a/llamafile/flags.cpp b/llamafile/flags.cpp index f06bc69465..a65af6f7f7 100644 --- a/llamafile/flags.cpp +++ b/llamafile/flags.cpp @@ -47,6 +47,7 @@ bool FLAG_unsecure = false; const char *FLAG_file = nullptr; const char *FLAG_ip_header = nullptr; const char *FLAG_listen = "0.0.0.0:8080"; +const char *FLAG_url_prefix = nullptr; const char *FLAG_model = nullptr; const char *FLAG_prompt = nullptr; double FLAG_token_rate = 1; @@ -138,6 +139,13 @@ void llamafile_get_flags(int argc, char **argv) { continue; } + if (!strcmp(flag, "--url-prefix")) { + if (i == argc) + missing("--url-prefix"); + FLAG_url_prefix = argv[i++]; + continue; + } + if (!strcmp(flag, "-k") || !strcmp(flag, "--keepalive")) { if (i == argc) missing("--keepalive"); diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h index e63d79c5c6..dc817bd425 100644 --- a/llamafile/llamafile.h +++ b/llamafile/llamafile.h @@ -23,6 +23,7 @@ extern bool FLAG_unsecure; extern const char *FLAG_file; extern const char *FLAG_ip_header; extern const char *FLAG_listen; +extern const char *FLAG_url_prefix; extern const char *FLAG_model; extern const char *FLAG_prompt; extern double FLAG_token_rate; diff --git a/llamafile/server/client.cpp b/llamafile/server/client.cpp index 50fc630e4a..6c3caad81c 100644 --- a/llamafile/server/client.cpp +++ b/llamafile/server/client.cpp @@ -583,13 +583,32 @@ Client::dispatch() bool Client::dispatcher() { - if (path() == "/tokenize") + ctl::string_view p = path(); + + if (!g_url_prefix.empty()) { + if (FLAG_verbose >= 2) { + SLOG("request path %.*s", (int)p.size(), p.data()); + } + + size_t prefix_len = g_url_prefix.size(); + if (p.size() < prefix_len || + memcmp(p.data(), g_url_prefix.c_str(), prefix_len) != 0) { + SLOG("path prefix mismatch"); + return send_error(404); + } + + // Adjust path view to exclude prefix + p = ctl::string_view(p.data() + prefix_len, + p.size() - prefix_len); + } + + if (p == "/tokenize") return tokenize(); - if (path() == "/embedding") + if (p == "/embedding") return embedding(); - if (path() == "/v1/embeddings") + if (p == "/v1/embeddings") return embedding(); - if (path() == "/completion") + if (p == "/completion") return completion(); return send_error(404); } diff --git a/llamafile/server/listen.cpp b/llamafile/server/listen.cpp index 4f22b1f49e..86b8df0015 100644 --- a/llamafile/server/listen.cpp +++ b/llamafile/server/listen.cpp @@ -26,16 +26,14 @@ #include #include "log.h" +#include "server.h" void print_listening_url(unsigned ip, int port) { - SLOG("listen http://%hhu.%hhu.%hhu.%hhu:%hu", - ip >> 24, - ip >> 16, - ip >> 8, - ip, - port); + SLOG("listen http://%hhu.%hhu.%hhu.%hhu:%hu%s", + ip >> 24, ip >> 16, ip >> 8, ip, port, + g_url_prefix.c_str()); } int diff --git a/llamafile/server/main.1 b/llamafile/server/main.1 index f711a3a063..072a118eb6 100644 --- a/llamafile/server/main.1 +++ b/llamafile/server/main.1 @@ -33,6 +33,11 @@ Specifies the local [HOST:]PORT on which the HTTP server should listen. By default this is 0.0.0.0:8080 which means llamafiler will bind to port 8080 on every locally available IPv4 network interface. This option may currently only be specified once. +.It Fl Fl url-prefix Ar URLPREFIX +Specifies a URL prefix (subdirectory) under which the HTTP server will +make the API accessible, e.g. /lamafiler. Useful when running llamafiler +behind a reverse proxy such as NGINX or Redbean. By default, this is set +to / (root). .It Fl w Ar N , Fl Fl workers Ar N Number of HTTP client handling threads. .It Fl Fl trust Ar CIDR diff --git a/llamafile/server/main.1.asc b/llamafile/server/main.1.asc index a9d441677e..1916f3d7c6 100644 --- a/llamafile/server/main.1.asc +++ b/llamafile/server/main.1.asc @@ -35,6 +35,12 @@ will bind to port 8080 on every locally available IPv4 network interface. This option may currently only be specified once. + --url-prefix URLPREFIX + Specifies a URL prefix (subdirectory) under which the HTTP + server will make the API accessible, e.g. /lamafiler. Useful + when running llamafiler behind a reverse proxy such as NGINX or + Redbean. By default, this is set to / (root). + -w N, --workers N Number of HTTP client handling threads. diff --git a/llamafile/server/main.cpp b/llamafile/server/main.cpp index e18410531d..080b675fce 100644 --- a/llamafile/server/main.cpp +++ b/llamafile/server/main.cpp @@ -29,9 +29,11 @@ #include "signals.h" #include "time.h" #include "tokenbucket.h" +#include "utils.h" Server* g_server; llama_model* g_model; +ctl::string g_url_prefix; int main(int argc, char* argv[]) @@ -58,6 +60,9 @@ main(int argc, char* argv[]) LoadZipArgs(&argc, &argv); llamafile_get_flags(argc, argv); + // normalize URL prefix + g_url_prefix = normalize_url_prefix(FLAG_url_prefix); + // initialize subsystems time_init(); tokenbucket_init(); @@ -125,4 +130,4 @@ main(int argc, char* argv[]) while (!pthread_orphan_np()) pthread_decimate_np(); CheckForMemoryLeaks(); -} +} \ No newline at end of file diff --git a/llamafile/server/server.h b/llamafile/server/server.h index e9b09ff809..b5fe4a4d06 100644 --- a/llamafile/server/server.h +++ b/llamafile/server/server.h @@ -19,6 +19,7 @@ #include #include #include +#include struct Server { @@ -46,6 +47,7 @@ struct Server }; extern Server* g_server; +extern ctl::string g_url_prefix; int create_listening_socket(const char*); diff --git a/llamafile/server/utils.h b/llamafile/server/utils.h index 6daeb7134a..4435138da4 100644 --- a/llamafile/server/utils.h +++ b/llamafile/server/utils.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include extern const signed char kHexToInt[256]; @@ -31,3 +32,21 @@ or_empty(ctl::optional x) return x.value(); return {}; } + +static inline ctl::string normalize_url_prefix(ctl::string url_prefix) { + // Rule 1: Replace multiple slashes with single slash + while (url_prefix.find("//") != ctl::string::npos) { + url_prefix.replace(url_prefix.find("//"), 2, "/"); + } + + // Rule 2: Remove trailing slash + if (!url_prefix.empty() && url_prefix.back() == '/') { + url_prefix.pop_back(); + } + + // Rule 3: Convert single slash to empty string + if (url_prefix == "/") { + url_prefix.clear(); + } + return url_prefix; +} \ No newline at end of file