Skip to content

Commit

Permalink
refactor: rpc server
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <thxcode0824@gmail.com>
  • Loading branch information
thxCode committed Aug 24, 2024
1 parent 004ac1f commit a18401d
Show file tree
Hide file tree
Showing 4 changed files with 704 additions and 30 deletions.
4 changes: 2 additions & 2 deletions llama-box/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2890,8 +2890,8 @@ int main(int argc, char **argv) {
llama_numa_init(params.numa);

if (bparams.rparams.port > 0) {
rpc_server_params &rparams = bparams.rparams;
return rpc_server_start(rparams);
rpcserver_params &rparams = bparams.rparams;
return rpcserver_start(rparams);
}

server_context ctx_server;
Expand Down
2 changes: 1 addition & 1 deletion llama-box/param.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ using json = nlohmann::json;

struct llama_box_params {
gpt_params gparams;
rpc_server_params rparams;
rpcserver_params rparams;

int32_t conn_idle = 60; // connection idle in seconds
int32_t conn_keepalive = 15; // connection keep-alive in seconds
Expand Down
25 changes: 20 additions & 5 deletions llama-box/patches/rpc-server.patch
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,36 @@ index d483cf1a..98486cb3 100644
}
#endif
diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m
index 995f1934..89b932db 100644
index 995f1934..b52c418b 100644
--- a/ggml/src/ggml-metal.m
+++ b/ggml/src/ggml-metal.m
@@ -3300,3 +3300,14 @@ GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void *
@@ -3300,3 +3300,29 @@ GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void *
GGML_UNUSED(params);
GGML_UNUSED(user_data);
}
+
+GGML_CALL void ggml_backend_metal_get_device_memory(size_t *free, size_t *total) {
+ id<MTLDevice> device = ggml_backend_metal_get_device();
+ if (free) {
+ *free = device.recommendedMaxWorkingSetSize;
+ }
+ if (total) {
+#if TARGET_OS_OSX || (TARGET_OS_IOS && __clang_major__ >= 15)
+ if (@available(macOS 10.12, iOS 16.0, *)) {
+ *total = device.recommendedMaxWorkingSetSize;
+ }
+#elif TARGET_OS_OSX
+ if (device.maxTransferRate != 0) {
+ *total = device.maxTransferRate;
+ } else {
+ *total = 0;
+ }
+#endif
+ }
+ if (free) {
+ size_t allocated = device.currentAllocatedSize;
+ if (total && *total > allocated) {
+ *free = *total - allocated;
+ } else {
+ *free = 0;
+ }
+ }
+ ggml_backend_metal_free_device();
+}
Loading

0 comments on commit a18401d

Please sign in to comment.