Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for configurable URL prefix in llamafiler #604

Merged
merged 3 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llamafile/flags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ bool FLAG_unsecure = false;
const char *FLAG_file = nullptr;
const char *FLAG_ip_header = nullptr;
const char *FLAG_listen = "0.0.0.0:8080";
const char *FLAG_url_prefix = nullptr;
const char *FLAG_model = nullptr;
const char *FLAG_prompt = nullptr;
double FLAG_token_rate = 1;
Expand Down Expand Up @@ -138,6 +139,13 @@ void llamafile_get_flags(int argc, char **argv) {
continue;
}

if (!strcmp(flag, "--url-prefix")) {
if (i == argc)
missing("--url-prefix");
FLAG_url_prefix = argv[i++];
continue;
}

if (!strcmp(flag, "-k") || !strcmp(flag, "--keepalive")) {
if (i == argc)
missing("--keepalive");
Expand Down
1 change: 1 addition & 0 deletions llamafile/llamafile.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ extern bool FLAG_unsecure;
extern const char *FLAG_file;
extern const char *FLAG_ip_header;
extern const char *FLAG_listen;
extern const char *FLAG_url_prefix;
extern const char *FLAG_model;
extern const char *FLAG_prompt;
extern double FLAG_token_rate;
Expand Down
27 changes: 23 additions & 4 deletions llamafile/server/client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,13 +583,32 @@ Client::dispatch()
bool
Client::dispatcher()
{
if (path() == "/tokenize")
ctl::string_view p = path();

if (!g_url_prefix.empty()) {
if (FLAG_verbose >= 2) {
SLOG("request path %.*s", (int)p.size(), p.data());
}

size_t prefix_len = g_url_prefix.size();
if (p.size() < prefix_len ||
memcmp(p.data(), g_url_prefix.c_str(), prefix_len) != 0) {
SLOG("path prefix mismatch");
return send_error(404);
}

// Adjust path view to exclude prefix
p = ctl::string_view(p.data() + prefix_len,
p.size() - prefix_len);
}

if (p == "/tokenize")
return tokenize();
if (path() == "/embedding")
if (p == "/embedding")
return embedding();
if (path() == "/v1/embeddings")
if (p == "/v1/embeddings")
return embedding();
if (path() == "/completion")
if (p == "/completion")
return completion();
return send_error(404);
}
Expand Down
10 changes: 4 additions & 6 deletions llamafile/server/listen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,14 @@
#include <sys/socket.h>

#include "log.h"
#include "server.h"

void
print_listening_url(unsigned ip, int port)
{
SLOG("listen http://%hhu.%hhu.%hhu.%hhu:%hu",
ip >> 24,
ip >> 16,
ip >> 8,
ip,
port);
SLOG("listen http://%hhu.%hhu.%hhu.%hhu:%hu%s",
ip >> 24, ip >> 16, ip >> 8, ip, port,
g_url_prefix.c_str());
}

int
Expand Down
5 changes: 5 additions & 0 deletions llamafile/server/main.1
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ Specifies the local [HOST:]PORT on which the HTTP server should listen.
By default this is 0.0.0.0:8080 which means llamafiler will bind to port
8080 on every locally available IPv4 network interface. This option may
currently only be specified once.
.It Fl Fl url-prefix Ar URLPREFIX
Specifies a URL prefix (subdirectory) under which the HTTP server will
make the API accessible, e.g. /lamafiler. Useful when running llamafiler
behind a reverse proxy such as NGINX or Redbean. By default, this is set
to / (root).
.It Fl w Ar N , Fl Fl workers Ar N
Number of HTTP client handling threads.
.It Fl Fl trust Ar CIDR
Expand Down
6 changes: 6 additions & 0 deletions llamafile/server/main.1.asc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@
will bind to port 8080 on every locally available IPv4 network
interface. This option may currently only be specified once.

--url-prefix URLPREFIX
Specifies a URL prefix (subdirectory) under which the HTTP
server will make the API accessible, e.g. /lamafiler. Useful
when running llamafiler behind a reverse proxy such as NGINX or
Redbean. By default, this is set to / (root).

-w N, --workers N
Number of HTTP client handling threads.

Expand Down
7 changes: 6 additions & 1 deletion llamafile/server/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@
#include "signals.h"
#include "time.h"
#include "tokenbucket.h"
#include "utils.h"

Server* g_server;
llama_model* g_model;
ctl::string g_url_prefix;

int
main(int argc, char* argv[])
Expand All @@ -58,6 +60,9 @@ main(int argc, char* argv[])
LoadZipArgs(&argc, &argv);
llamafile_get_flags(argc, argv);

// normalize URL prefix
g_url_prefix = normalize_url_prefix(FLAG_url_prefix);

// initialize subsystems
time_init();
tokenbucket_init();
Expand Down Expand Up @@ -125,4 +130,4 @@ main(int argc, char* argv[])
while (!pthread_orphan_np())
pthread_decimate_np();
CheckForMemoryLeaks();
}
}
2 changes: 2 additions & 0 deletions llamafile/server/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <atomic>
#include <cosmo.h>
#include <pthread.h>
#include <ctl/string.h>

struct Server
{
Expand Down Expand Up @@ -46,6 +47,7 @@ struct Server
};

extern Server* g_server;
extern ctl::string g_url_prefix;

int
create_listening_socket(const char*);
19 changes: 19 additions & 0 deletions llamafile/server/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#pragma once
#include <ctl/optional.h>
#include <ctl/string.h>
#include <ctl/string_view.h>

extern const signed char kHexToInt[256];
Expand All @@ -31,3 +32,21 @@ or_empty(ctl::optional<ctl::string_view> x)
return x.value();
return {};
}

static inline ctl::string normalize_url_prefix(ctl::string url_prefix) {
// Rule 1: Replace multiple slashes with single slash
while (url_prefix.find("//") != ctl::string::npos) {
url_prefix.replace(url_prefix.find("//"), 2, "/");
}

// Rule 2: Remove trailing slash
if (!url_prefix.empty() && url_prefix.back() == '/') {
url_prefix.pop_back();
}

// Rule 3: Convert single slash to empty string
if (url_prefix == "/") {
url_prefix.clear();
}
return url_prefix;
}