diff --git a/llamafile/flags.cpp b/llamafile/flags.cpp
index f06bc69465..a65af6f7f7 100644
--- a/llamafile/flags.cpp
+++ b/llamafile/flags.cpp
@@ -47,6 +47,7 @@ bool FLAG_unsecure = false;
 const char *FLAG_file = nullptr;
 const char *FLAG_ip_header = nullptr;
 const char *FLAG_listen = "0.0.0.0:8080";
+const char *FLAG_url_prefix = nullptr;
 const char *FLAG_model = nullptr;
 const char *FLAG_prompt = nullptr;
 double FLAG_token_rate = 1;
@@ -138,6 +139,13 @@ void llamafile_get_flags(int argc, char **argv) {
             continue;
         }
 
+        if (!strcmp(flag, "--url-prefix")) {
+            if (i == argc)
+                missing("--url-prefix");
+            FLAG_url_prefix = argv[i++];
+            continue;
+        }
+
         if (!strcmp(flag, "-k") || !strcmp(flag, "--keepalive")) {
             if (i == argc)
                 missing("--keepalive");
diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h
index e63d79c5c6..dc817bd425 100644
--- a/llamafile/llamafile.h
+++ b/llamafile/llamafile.h
@@ -23,6 +23,7 @@ extern bool FLAG_unsecure;
 extern const char *FLAG_file;
 extern const char *FLAG_ip_header;
 extern const char *FLAG_listen;
+extern const char *FLAG_url_prefix;
 extern const char *FLAG_model;
 extern const char *FLAG_prompt;
 extern double FLAG_token_rate;
diff --git a/llamafile/server/client.cpp b/llamafile/server/client.cpp
index 50fc630e4a..6c3caad81c 100644
--- a/llamafile/server/client.cpp
+++ b/llamafile/server/client.cpp
@@ -583,13 +583,32 @@ Client::dispatch()
 bool
 Client::dispatcher()
 {
-    if (path() == "/tokenize")
+    ctl::string_view p = path();
+
+    if (!g_url_prefix.empty()) {
+        if (FLAG_verbose >= 2) {
+             SLOG("request path %.*s", (int)p.size(), p.data());
+        }
+
+        size_t prefix_len = g_url_prefix.size();
+        if (p.size() < prefix_len ||
+            memcmp(p.data(), g_url_prefix.c_str(), prefix_len) != 0) {
+            SLOG("path prefix mismatch");
+            return send_error(404);
+        }
+
+        // Adjust path view to exclude prefix
+        p = ctl::string_view(p.data() + prefix_len,
+                           p.size() - prefix_len);
+    }
+
+    if (p == "/tokenize")
         return tokenize();
-    if (path() == "/embedding")
+    if (p == "/embedding")
         return embedding();
-    if (path() == "/v1/embeddings")
+    if (p == "/v1/embeddings")
         return embedding();
-    if (path() == "/completion")
+    if (p == "/completion")
         return completion();
     return send_error(404);
 }
diff --git a/llamafile/server/listen.cpp b/llamafile/server/listen.cpp
index 4f22b1f49e..86b8df0015 100644
--- a/llamafile/server/listen.cpp
+++ b/llamafile/server/listen.cpp
@@ -26,16 +26,14 @@
 #include <sys/socket.h>
 
 #include "log.h"
+#include "server.h"
 
 void
 print_listening_url(unsigned ip, int port)
 {
-    SLOG("listen http://%hhu.%hhu.%hhu.%hhu:%hu",
-         ip >> 24,
-         ip >> 16,
-         ip >> 8,
-         ip,
-         port);
+    SLOG("listen http://%hhu.%hhu.%hhu.%hhu:%hu%s",
+         ip >> 24, ip >> 16, ip >> 8, ip, port,
+         g_url_prefix.c_str());
 }
 
 int
diff --git a/llamafile/server/main.1 b/llamafile/server/main.1
index f711a3a063..072a118eb6 100644
--- a/llamafile/server/main.1
+++ b/llamafile/server/main.1
@@ -33,6 +33,11 @@ Specifies the local [HOST:]PORT on which the HTTP server should listen.
 By default this is 0.0.0.0:8080 which means llamafiler will bind to port
 8080 on every locally available IPv4 network interface. This option may
 currently only be specified once.
+.It Fl Fl url-prefix Ar URLPREFIX
+Specifies a URL prefix (subdirectory) under which the HTTP server will
+make the API accessible, e.g. /lamafiler. Useful when running llamafiler
+behind a reverse proxy such as NGINX or Redbean. By default, this is set
+to / (root).
 .It Fl w Ar N , Fl Fl workers Ar N
 Number of HTTP client handling threads.
 .It Fl Fl trust Ar CIDR
diff --git a/llamafile/server/main.1.asc b/llamafile/server/main.1.asc
index a9d441677e..1916f3d7c6 100644
--- a/llamafile/server/main.1.asc
+++ b/llamafile/server/main.1.asc
@@ -35,6 +35,12 @@
                will  bind to port 8080 on every locally available IPv4 network
                interface. This option may currently only be specified once.
 
+       [1m--url-prefix [4m[22mURLPREFIX[0m
+               Specifies a URL prefix  (subdirectory)  under  which  the  HTTP
+               server  will  make  the API accessible, e.g. /lamafiler. Useful
+               when running llamafiler behind a reverse proxy such as NGINX or
+               Redbean. By default, this is set to / (root).
+
        [1m-w [4m[22mN[24m, [1m--workers [4m[22mN[0m
                Number of HTTP client handling threads.
 
diff --git a/llamafile/server/main.cpp b/llamafile/server/main.cpp
index e18410531d..080b675fce 100644
--- a/llamafile/server/main.cpp
+++ b/llamafile/server/main.cpp
@@ -29,9 +29,11 @@
 #include "signals.h"
 #include "time.h"
 #include "tokenbucket.h"
+#include "utils.h"
 
 Server* g_server;
 llama_model* g_model;
+ctl::string g_url_prefix;
 
 int
 main(int argc, char* argv[])
@@ -58,6 +60,9 @@ main(int argc, char* argv[])
     LoadZipArgs(&argc, &argv);
     llamafile_get_flags(argc, argv);
 
+    // normalize URL prefix
+    g_url_prefix = normalize_url_prefix(FLAG_url_prefix);
+
     // initialize subsystems
     time_init();
     tokenbucket_init();
@@ -125,4 +130,4 @@ main(int argc, char* argv[])
     while (!pthread_orphan_np())
         pthread_decimate_np();
     CheckForMemoryLeaks();
-}
+}
\ No newline at end of file
diff --git a/llamafile/server/server.h b/llamafile/server/server.h
index e9b09ff809..b5fe4a4d06 100644
--- a/llamafile/server/server.h
+++ b/llamafile/server/server.h
@@ -19,6 +19,7 @@
 #include <atomic>
 #include <cosmo.h>
 #include <pthread.h>
+#include <ctl/string.h>
 
 struct Server
 {
@@ -46,6 +47,7 @@ struct Server
 };
 
 extern Server* g_server;
+extern ctl::string g_url_prefix;
 
 int
 create_listening_socket(const char*);
diff --git a/llamafile/server/utils.h b/llamafile/server/utils.h
index 6daeb7134a..4435138da4 100644
--- a/llamafile/server/utils.h
+++ b/llamafile/server/utils.h
@@ -17,6 +17,7 @@
 
 #pragma once
 #include <ctl/optional.h>
+#include <ctl/string.h>
 #include <ctl/string_view.h>
 
 extern const signed char kHexToInt[256];
@@ -31,3 +32,21 @@ or_empty(ctl::optional<ctl::string_view> x)
         return x.value();
     return {};
 }
+
+static inline ctl::string normalize_url_prefix(ctl::string url_prefix) {
+    // Rule 1: Replace multiple slashes with single slash
+    while (url_prefix.find("//") != ctl::string::npos) {
+        url_prefix.replace(url_prefix.find("//"), 2, "/");
+    }
+
+    // Rule 2: Remove trailing slash
+    if (!url_prefix.empty() && url_prefix.back() == '/') {
+        url_prefix.pop_back();
+    }
+
+    // Rule 3: Convert single slash to empty string
+    if (url_prefix == "/") {
+        url_prefix.clear();
+    }
+    return url_prefix;
+}
\ No newline at end of file