From 36556496aa4c5fd04849ff050edbb14ce6ca6a28 Mon Sep 17 00:00:00 2001 From: Vlad Lasky Date: Mon, 28 Oct 2024 16:47:12 +1100 Subject: [PATCH 1/2] Support for configurable URL prefix in llamafiler Added support for new llamafiler commandline parameter --url-prefix or -u This specifies a URL prefix (subdirectory) under which the llamafiler HTTP server will make the API accessible, e.g. /lamafiler. Useful when running llamafiler behind a reverse proxy such as NGINX or Redbean. By default, this is set to / (root). --- llamafile/flags.cpp | 8 ++++++++ llamafile/llamafile.h | 1 + llamafile/server/client.cpp | 27 +++++++++++++++++++++++---- llamafile/server/listen.cpp | 10 ++++------ llamafile/server/main.1 | 5 +++++ llamafile/server/main.1.asc | 6 ++++++ llamafile/server/main.cpp | 7 ++++++- llamafile/server/server.h | 2 ++ llamafile/server/utils.h | 19 +++++++++++++++++++ 9 files changed, 74 insertions(+), 11 deletions(-) diff --git a/llamafile/flags.cpp b/llamafile/flags.cpp index 05ebb309c2..98e43edfc8 100644 --- a/llamafile/flags.cpp +++ b/llamafile/flags.cpp @@ -46,6 +46,7 @@ bool FLAG_unsecure = false; const char *FLAG_file = nullptr; const char *FLAG_ip_header = nullptr; const char *FLAG_listen = "0.0.0.0:8080"; +const char *FLAG_url_prefix = nullptr; const char *FLAG_model = nullptr; const char *FLAG_prompt = nullptr; double FLAG_token_rate = 1; @@ -137,6 +138,13 @@ void llamafile_get_flags(int argc, char **argv) { continue; } + if (!strcmp(flag, "-u") || !strcmp(flag, "--url-prefix")) { + if (i == argc) + missing("--url-prefix"); + FLAG_url_prefix = argv[i++]; + continue; + } + if (!strcmp(flag, "-k") || !strcmp(flag, "--keepalive")) { if (i == argc) missing("--keepalive"); diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h index af48025126..006a751a59 100644 --- a/llamafile/llamafile.h +++ b/llamafile/llamafile.h @@ -22,6 +22,7 @@ extern bool FLAG_unsecure; extern const char *FLAG_file; extern const char *FLAG_ip_header; extern const char *FLAG_listen; +extern const char *FLAG_url_prefix; extern const char *FLAG_model; extern const char *FLAG_prompt; extern double FLAG_token_rate; diff --git a/llamafile/server/client.cpp b/llamafile/server/client.cpp index 50fc630e4a..6c3caad81c 100644 --- a/llamafile/server/client.cpp +++ b/llamafile/server/client.cpp @@ -583,13 +583,32 @@ Client::dispatch() bool Client::dispatcher() { - if (path() == "/tokenize") + ctl::string_view p = path(); + + if (!g_url_prefix.empty()) { + if (FLAG_verbose >= 2) { + SLOG("request path %.*s", (int)p.size(), p.data()); + } + + size_t prefix_len = g_url_prefix.size(); + if (p.size() < prefix_len || + memcmp(p.data(), g_url_prefix.c_str(), prefix_len) != 0) { + SLOG("path prefix mismatch"); + return send_error(404); + } + + // Adjust path view to exclude prefix + p = ctl::string_view(p.data() + prefix_len, + p.size() - prefix_len); + } + + if (p == "/tokenize") return tokenize(); - if (path() == "/embedding") + if (p == "/embedding") return embedding(); - if (path() == "/v1/embeddings") + if (p == "/v1/embeddings") return embedding(); - if (path() == "/completion") + if (p == "/completion") return completion(); return send_error(404); } diff --git a/llamafile/server/listen.cpp b/llamafile/server/listen.cpp index 4f22b1f49e..86b8df0015 100644 --- a/llamafile/server/listen.cpp +++ b/llamafile/server/listen.cpp @@ -26,16 +26,14 @@ #include #include "log.h" +#include "server.h" void print_listening_url(unsigned ip, int port) { - SLOG("listen http://%hhu.%hhu.%hhu.%hhu:%hu", - ip >> 24, - ip >> 16, - ip >> 8, - ip, - port); + SLOG("listen http://%hhu.%hhu.%hhu.%hhu:%hu%s", + ip >> 24, ip >> 16, ip >> 8, ip, port, + g_url_prefix.c_str()); } int diff --git a/llamafile/server/main.1 b/llamafile/server/main.1 index f711a3a063..87b155a743 100644 --- a/llamafile/server/main.1 +++ b/llamafile/server/main.1 @@ -33,6 +33,11 @@ Specifies the local [HOST:]PORT on which the HTTP server should listen. By default this is 0.0.0.0:8080 which means llamafiler will bind to port 8080 on every locally available IPv4 network interface. This option may currently only be specified once. +.It Fl u Ar URLPREFIX , Fl Fl url-prefix Ar URLPREFIX +Specifies a URL prefix (subdirectory) under which the HTTP server will +make the API accessible, e.g. /lamafiler. Useful when running llamafiler +behind a reverse proxy such as NGINX or Redbean. By default, this is set +to / (root). .It Fl w Ar N , Fl Fl workers Ar N Number of HTTP client handling threads. .It Fl Fl trust Ar CIDR diff --git a/llamafile/server/main.1.asc b/llamafile/server/main.1.asc index a9d441677e..984a7329e8 100644 --- a/llamafile/server/main.1.asc +++ b/llamafile/server/main.1.asc @@ -35,6 +35,12 @@ will bind to port 8080 on every locally available IPv4 network interface. This option may currently only be specified once. + -u URLPREFIX, --url-prefix URLPREFIX + Specifies a URL prefix (subdirectory) under which the HTTP + server will make the API accessible, e.g. /lamafiler. + Useful when running llamafiler behind a reverse proxy such as + NGINX or Redbean. By default, this is set to / (root). + -w N, --workers N Number of HTTP client handling threads. diff --git a/llamafile/server/main.cpp b/llamafile/server/main.cpp index e18410531d..080b675fce 100644 --- a/llamafile/server/main.cpp +++ b/llamafile/server/main.cpp @@ -29,9 +29,11 @@ #include "signals.h" #include "time.h" #include "tokenbucket.h" +#include "utils.h" Server* g_server; llama_model* g_model; +ctl::string g_url_prefix; int main(int argc, char* argv[]) @@ -58,6 +60,9 @@ main(int argc, char* argv[]) LoadZipArgs(&argc, &argv); llamafile_get_flags(argc, argv); + // normalize URL prefix + g_url_prefix = normalize_url_prefix(FLAG_url_prefix); + // initialize subsystems time_init(); tokenbucket_init(); @@ -125,4 +130,4 @@ main(int argc, char* argv[]) while (!pthread_orphan_np()) pthread_decimate_np(); CheckForMemoryLeaks(); -} +} \ No newline at end of file diff --git a/llamafile/server/server.h b/llamafile/server/server.h index e9b09ff809..b5fe4a4d06 100644 --- a/llamafile/server/server.h +++ b/llamafile/server/server.h @@ -19,6 +19,7 @@ #include #include #include +#include struct Server { @@ -46,6 +47,7 @@ struct Server }; extern Server* g_server; +extern ctl::string g_url_prefix; int create_listening_socket(const char*); diff --git a/llamafile/server/utils.h b/llamafile/server/utils.h index 6daeb7134a..4435138da4 100644 --- a/llamafile/server/utils.h +++ b/llamafile/server/utils.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include extern const signed char kHexToInt[256]; @@ -31,3 +32,21 @@ or_empty(ctl::optional x) return x.value(); return {}; } + +static inline ctl::string normalize_url_prefix(ctl::string url_prefix) { + // Rule 1: Replace multiple slashes with single slash + while (url_prefix.find("//") != ctl::string::npos) { + url_prefix.replace(url_prefix.find("//"), 2, "/"); + } + + // Rule 2: Remove trailing slash + if (!url_prefix.empty() && url_prefix.back() == '/') { + url_prefix.pop_back(); + } + + // Rule 3: Convert single slash to empty string + if (url_prefix == "/") { + url_prefix.clear(); + } + return url_prefix; +} \ No newline at end of file From fb352f3165f4d7932a2cf5f1b85ffa8e54b9519e Mon Sep 17 00:00:00 2001 From: Vlad Lasky Date: Mon, 28 Oct 2024 18:15:40 +1100 Subject: [PATCH 2/2] Removed short form of url prefix commandline option (-u) --- llamafile/flags.cpp | 2 +- llamafile/server/main.1 | 2 +- llamafile/server/main.1.asc | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llamafile/flags.cpp b/llamafile/flags.cpp index 98e43edfc8..edc2a07034 100644 --- a/llamafile/flags.cpp +++ b/llamafile/flags.cpp @@ -138,7 +138,7 @@ void llamafile_get_flags(int argc, char **argv) { continue; } - if (!strcmp(flag, "-u") || !strcmp(flag, "--url-prefix")) { + if (!strcmp(flag, "--url-prefix")) { if (i == argc) missing("--url-prefix"); FLAG_url_prefix = argv[i++]; diff --git a/llamafile/server/main.1 b/llamafile/server/main.1 index 87b155a743..072a118eb6 100644 --- a/llamafile/server/main.1 +++ b/llamafile/server/main.1 @@ -33,7 +33,7 @@ Specifies the local [HOST:]PORT on which the HTTP server should listen. By default this is 0.0.0.0:8080 which means llamafiler will bind to port 8080 on every locally available IPv4 network interface. This option may currently only be specified once. -.It Fl u Ar URLPREFIX , Fl Fl url-prefix Ar URLPREFIX +.It Fl Fl url-prefix Ar URLPREFIX Specifies a URL prefix (subdirectory) under which the HTTP server will make the API accessible, e.g. /lamafiler. Useful when running llamafiler behind a reverse proxy such as NGINX or Redbean. By default, this is set diff --git a/llamafile/server/main.1.asc b/llamafile/server/main.1.asc index 984a7329e8..1916f3d7c6 100644 --- a/llamafile/server/main.1.asc +++ b/llamafile/server/main.1.asc @@ -35,11 +35,11 @@ will bind to port 8080 on every locally available IPv4 network interface. This option may currently only be specified once. - -u URLPREFIX, --url-prefix URLPREFIX - Specifies a URL prefix (subdirectory) under which the HTTP - server will make the API accessible, e.g. /lamafiler. - Useful when running llamafiler behind a reverse proxy such as - NGINX or Redbean. By default, this is set to / (root). + --url-prefix URLPREFIX + Specifies a URL prefix (subdirectory) under which the HTTP + server will make the API accessible, e.g. /lamafiler. Useful + when running llamafiler behind a reverse proxy such as NGINX or + Redbean. By default, this is set to / (root). -w N, --workers N Number of HTTP client handling threads.