-
Notifications
You must be signed in to change notification settings - Fork 1
/
gguf.sh
879 lines (779 loc) · 28 KB
/
gguf.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
#!/bin/bash
# 🦙 Welcome to the GGUF (Groovy GGML Utility Functions) script! 🚀
#
# Prerequisites (because even llamas need tools):
# - llama-server command (macOS: brew install llama.cpp)
# - huggingface-cli command (macOS: brew install huggingface-cli)
# - sqlite3 (usually pre-installed on macOS, like a built-in llama pouch)
# - jq (macOS: brew install jq) - because parsing JSON without jq is like trying to shear a llama with scissors!
#
# This script is your friendly neighborhood llama wrangler! 🤠
# It manages and interacts with large language models using llama.cpp,
# providing a whole petting zoo of functionality:
# - Download models (like adopting new llamas)
# - Run models (let your llamas roam free in the digital pasture)
# - Chat with models (have a heart-to-heart with your favorite llama)
# - Manage a local database of model information (keep track of your llama herd)
#
# So saddle up, partner! Let's wrangle some AI llamas! 🤠🦙
# Configuration Variables
MODELS_DIR="$HOME/.cache/gguf/models/"
LLAMA_SERVER="/opt/homebrew/bin/llama-server"
LLAMA_CLI="/opt/homebrew/bin/llama-cli"
DB_PATH="$HOME/.cache/gguf/gguf.db"
DEFAULT_PORT=1966
API_URL="${API_URL:-http://localhost:$DEFAULT_PORT}"
# Model parameters
TEMPERATURE=0.7
TOP_K=40
TOP_P=0.5
N_PREDICT=256
# Helper Functions
trim() {
shopt -s extglob
set -- "${1##+([[:space:]])}"
printf "%s" "${1%%+([[:space:]])}"
}
trim_trailing() {
shopt -s extglob
printf "%s" "${1%%+([[:space:]])}"
}
trim_blank_lines() {
sed '/^[[:space:]]*$/d'
}
tokenize() {
curl \
--silent \
--request POST \
--url "${API_URL}/tokenize" \
--header "Content-Type: application/json" \
--data-raw "$(jq -ns --arg content "$1" '{content:$content}')" \
| jq '.tokens[]'
}
ensure_server_running() {
local slug="$1"
local model_path
model_path=$(get_model_path "$slug") || return 1
update_last_used "$slug"
if ! pgrep -f "llama-server.*$model_path" > /dev/null; then
log_info "Starting server for model $slug..."
local log_file="/tmp/llama_server_${slug}.log"
local all_model_paths
all_model_paths=$(sqlite3 "$DB_PATH" "SELECT file_path FROM models WHERE slug='$slug';")
IFS=';' read -ra model_paths <<< "$all_model_paths"
local model_args=""
for path in "${model_paths[@]}"; do
model_args="$model_args -m \"$path\""
done
nohup bash -c "$LLAMA_SERVER $model_args --port $DEFAULT_PORT > $log_file 2>&1" &
local server_pid=$!
log_info "Server started with PID $server_pid. Logs: $log_file"
wait_for_server
else
log_info "Server for model $slug is already running."
fi
}
wait_for_server() {
local wait_time=0
local max_wait_time=300 # 5 minutes
while ! nc -z localhost "$DEFAULT_PORT"; do
if [ $wait_time -ge $max_wait_time ]; then
log_error "Server failed to start within $max_wait_time seconds. Check logs: $log_file"
return 1
fi
if (( wait_time % 10 == 0 )); then
echo -n "."
fi
sleep 1
((wait_time++))
done
echo "" # New line after dots
log_info "Server is ready after $wait_time seconds."
}
api_request() {
local method="$1" # GET or POST
local endpoint="$2" # API endpoint
local data="$3" # JSON data for POST requests
curl -s -w "\n%{http_code}" -X "$method" "http://localhost:$DEFAULT_PORT/$endpoint" \
-H "Content-Type: application/json" ${data:+-d "$data"}
}
handle_response() {
local response="$1"
local success_action="$2" # Command to execute on success
local failure_action="$3" # Command to execute on failure
local status_code
status_code=$(echo "$response" | tail -n1)
local content
content=$(echo "$response" | sed '$d')
if [ "$status_code" -eq 200 ]; then
eval "$success_action"
else
eval "$failure_action"
fi
}
# Initialize the database if it doesn't exist
init_database() {
# Ensure the directory exists
mkdir -p "$(dirname "$DB_PATH")"
mkdir -p "$MODELS_DIR"
sqlite3 "$DB_PATH" <<EOF
CREATE TABLE IF NOT EXISTS models (
id INTEGER PRIMARY KEY,
slug TEXT UNIQUE,
model_id TEXT,
file_name TEXT,
file_path TEXT,
file_size TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
last_used DATETIME
);
EOF
}
# Utility Functions
log_info() { echo -e "\033[0;32m[INFO]\033[0m $*"; }
log_warn() { echo -e "\033[0;33m[WARN]\033[0m $*"; }
log_error() { echo -e "\033[0;31m[ERROR]\033[0m $*"; }
validate_model_id() {
local model_id="$1"
if [[ ! "$model_id" =~ ^[a-zA-Z0-9_-]+/[a-zA-Z0-9_.-]+$ ]]; then
log_error "Invalid Hugging Face model ID. It should be in the form 'author/model-name'."
return 1
fi
return 0
}
generate_slug() {
local model_path="$1"
echo "$model_path" | sed 's|.*/||; s|/[^/]*$||; s|/|-|g' | tr '[:upper:]' '[:lower:]' | tr -cs 'a-z0-9-' '-' | sed 's/^-//; s/-$//'
}
add_model_to_db() {
local slug="$1" model_id="$2" file_name="$3" file_path="$4" file_size="$5"
sqlite3 "$DB_PATH" "INSERT OR REPLACE INTO models (slug, model_id, file_name, file_path, file_size)
VALUES ('$slug', '$model_id', '$file_name', '$file_path', '$file_size');"
}
get_model_path() {
local slug="$1"
local result
result=$(sqlite3 "$DB_PATH" "SELECT file_path FROM models WHERE slug='$slug';")
if [ -z "$result" ]; then
log_error "Model with slug '$slug' not found in the database."
list_models >&2
return 1
fi
# If there are multiple paths, return the first one
echo "$result" | cut -d';' -f1
}
update_last_used() {
local slug="$1"
sqlite3 "$DB_PATH" "UPDATE models SET last_used=CURRENT_TIMESTAMP WHERE slug='$slug';"
}
remove_model_from_db() {
local slug="$1"
sqlite3 "$DB_PATH" "DELETE FROM models WHERE slug='$slug';"
}
list_models() {
if [ "$1" == "--help" ]; then
echo "Usage: gguf ls"
echo "List all models in the database."
return 0
fi
(
echo "SLUG|MODEL ID|SIZE|LAST USED"
sqlite3 -separator "|" "$DB_PATH" "
SELECT
slug,
model_id,
file_size,
COALESCE(datetime(last_used), 'Never') as last_used
FROM models
ORDER BY last_used DESC, created_at DESC;"
) | column -t -s '|'
}
# Model Management Functions
pull_model() {
local model_id="$1"
if [ "$model_id" == "--help" ]; then
echo "Usage: gguf pull <model_id>"
echo "Download a new model from Hugging Face."
echo
echo "Arguments:"
echo " <model_id> The Hugging Face model ID (e.g., 'author/model-name')"
return 0
fi
if ! validate_model_id "$model_id"; then return 1; fi
local model_dir="$MODELS_DIR/$model_id"
log_info "Checking for existing files in $model_dir..."
if [ -d "$model_dir" ] && find "$model_dir" -name "*.gguf" -print -quit | grep -q .; then
log_warn "Model already exists in $model_dir. Remove existing files to re-download."
return 0
fi
log_info "Fetching model information for $model_id..."
local api_url="https://huggingface.co/api/models/$model_id?filter=gguf&sort=lastModified"
local model_info
model_info=$(curl -s "$api_url")
if [ -z "$model_info" ]; then
log_error "Failed to fetch model information for $model_id"
return 1
fi
local file_to_download
file_to_download=$(echo "$model_info" | jq -r '.siblings[] | select(.rfilename | ascii_downcase | endswith("q4_k_m.gguf")) | .rfilename' | head -n 1)
if [ -z "$file_to_download" ]; then
log_error "No q4_k_m.gguf file found for $model_id"
return 1
fi
log_info "Downloading $file_to_download for model $model_id..."
mkdir -p "$model_dir"
huggingface-cli download "$model_id" "$file_to_download" --local-dir "$model_dir"
local downloaded_file="$model_dir/$file_to_download"
if [ -f "$downloaded_file" ]; then
local file_size
file_size=$(du -h "$downloaded_file" | cut -f1)
local slug
slug=$(generate_slug "$model_id")
add_model_to_db "$slug" "$model_id" "$file_to_download" "$downloaded_file" "$file_size"
log_info "Model added to database with slug: $slug"
echo "To use this model, run: gguf chat $slug"
else
log_error "Failed to download $file_to_download from $model_id"
log_info "You can try downloading manually and then use 'gguf import' to add it to the database."
fi
}
remove_model() {
local slug="$1"
if [ "$slug" == "--help" ]; then
echo "Usage: gguf rm <slug>"
echo "Remove a model from the filesystem and database."
echo
echo "Arguments:"
echo " <slug> The slug of the model to remove"
return 0
fi
local model_path
model_path=$(get_model_path "$slug") || return 1
rm -f "$model_path"
remove_model_from_db "$slug"
log_info "Model '$slug' removed from filesystem and database."
}
import_existing_models() {
if [ "$1" == "--help" ]; then
echo "Usage: gguf import [--help]"
echo "Import existing models from the filesystem into the database."
echo
echo "Description:"
echo " This command scans the MODELS_DIR ($MODELS_DIR) for .gguf files"
echo " and adds them to the database if they're not already present."
echo " It's useful for synchronizing the database with manually added models"
echo " or after moving/copying models to the MODELS_DIR."
echo
echo "Process:"
echo " 1. Scans MODELS_DIR for .gguf files"
echo " 2. For each file, generates a slug based on the file path"
echo " 3. Checks if the model is already in the database"
echo " 4. If not present, adds the model to the database with details like:"
echo " - Slug (for easy reference)"
echo " - Model ID (derived from file path)"
echo " - File name"
echo " - File path"
echo " - File size"
echo
echo "Options:"
echo " --help Show this help message"
return 0
fi
log_info "Scanning for existing models in $MODELS_DIR..."
find "$MODELS_DIR" -type f -name "*.gguf" | while read -r file_path; do
local file_name model_id slug file_size
file_name=$(basename "$file_path")
model_id=$(echo "$file_path" | sed "s|$MODELS_DIR/||" | sed 's|/[^/]*$||')
slug=$(generate_slug "$model_id")
file_size=$(du -h "$file_path" | cut -f1)
local existing
existing=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM models WHERE file_path='$file_path';")
if [ "$existing" -eq 0 ]; then
add_model_to_db "$slug" "$model_id" "$file_name" "$file_path" "$file_size"
log_info "Imported model: $slug"
else
log_warn "Model already in database: $slug"
fi
done
log_info "Import completed."
}
reset_db() {
if [ "$1" == "--help" ]; then
echo "Usage: gguf reset"
echo "Reset the database and re-import existing models."
return 0
fi
log_warn "Resetting the database..."
rm -f "$DB_PATH"
init_database
import_existing_models
log_info "Database reset and import complete."
}
alias_model() {
local old_slug="$1" new_slug="$2"
if [ "$old_slug" == "--help" ]; then
echo "Usage: gguf alias <old_slug> <new_slug>"
echo "Create an alias for a model."
echo
echo "Arguments:"
echo " <old_slug> The current slug of the model"
echo " <new_slug> The new slug to assign to the model"
return 0
fi
if [ -z "$old_slug" ] || [ -z "$new_slug" ]; then
echo "Usage: gguf alias <old_slug> <new_slug>"
return 1
fi
local exists new_exists
exists=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM models WHERE slug='$old_slug';")
new_exists=$(sqlite3 "$DB_PATH" "SELECT COUNT(*) FROM models WHERE slug='$new_slug';")
if [ "$exists" -eq 0 ]; then
log_error "Model with slug '$old_slug' not found."
return 1
elif [ "$new_exists" -ne 0 ]; then
log_error "Model with slug '$new_slug' already exists."
return 1
fi
sqlite3 "$DB_PATH" "UPDATE models SET slug='$new_slug' WHERE slug='$old_slug';"
log_info "Model '$old_slug' aliased to '$new_slug'."
}
# Server Management Functions
run_model() {
local slug="$1"
shift
if [ "$slug" == "--help" ]; then
echo "Usage: gguf run <slug> [text]"
echo "Run a model server and optionally complete text."
echo
echo "Arguments:"
echo " <slug> The slug of the model to run"
echo " [text] Optional text for completion"
return 0
fi
ensure_server_running "$slug" || return 1
if [ $# -gt 0 ]; then
local prompt="$*"
log_info "Completing text: $prompt"
local term_width=$(tput cols)
printf '%*s\n' "$term_width" | tr ' ' '─'
local data
data=$(jq -n --arg p "$prompt" --arg n "$N_PREDICT" --arg t "$TEMPERATURE" --arg k "$TOP_K" --arg tp "$TOP_P" \
'{prompt: $p, n_predict: ($n|tonumber), temperature: ($t|tonumber), top_k: ($k|tonumber), top_p: ($tp|tonumber)}')
local response
response=$(api_request POST "completion" "$data")
handle_response "$response" \
"echo \"\$content\" | jq -r '.content' | trim_blank_lines" \
"log_error \"Failed to complete text. Status code: \$status_code\"; echo \"\$content\""
return 0
fi
}
kill_model() {
local target="$1"
if [ "$target" == "--help" ]; then
echo "Usage: gguf kill <slug|pid|all>"
echo "Kill the server running the specified model, a specific process, or all servers."
echo
echo "Arguments:"
echo " <slug> The slug of the model to kill"
echo " <pid> The process ID to kill"
echo " all Kill all llama-server processes"
echo
echo "Use 'gguf ps' to see running models and their PIDs."
return 0
fi
if [ -z "$target" ]; then
log_error "No target provided. Usage: gguf kill <slug|pid|all>"
log_info "Use 'gguf ps' to see running models and their PIDs."
return 1
fi
if [[ "$target" =~ ^[0-9]+$ ]]; then
# If target is a numeric PID
if kill "$target" 2>/dev/null; then
log_info "Process with PID $target terminated."
else
log_error "Failed to terminate process with PID $target."
fi
else
# If target is a slug
local pids
pids=$(ps aux | grep "[l]lama-server.*$target" | awk '{print $2}')
if [ -z "$pids" ]; then
log_warn "No running server found for model '$target'."
return 1
fi
for pid in $pids; do
if kill "$pid" 2>/dev/null; then
log_info "Server for model '$target' (PID: $pid) terminated."
else
log_error "Failed to terminate server for model '$target' (PID: $pid)."
fi
done
fi
}
kill_all_servers() {
local pids
pids=$(pgrep -f "llama-server")
if [ -z "$pids" ]; then
log_warn "No running llama-server processes found."
return 0
fi
log_info "Killing all llama-server processes..."
kill $pids
sleep 2
# Force kill if any remain
pids=$(pgrep -f "llama-server")
if [ -n "$pids" ]; then
kill -9 $pids
fi
log_info "All llama-server processes terminated."
}
show_processes() {
if [ "$1" == "--help" ]; then
echo "Usage: gguf ps"
echo "Show running llama-server processes."
return 0
fi
(
echo "PID|SLUG|MODEL"
ps aux | grep "[l]lama-server" | while read -r line; do
pid=$(echo "$line" | awk '{print $2}')
cmd=$(echo "$line" | awk '{for(i=11;i<=NF;i++) printf "%s ", $i}')
model_file=$(echo "$cmd" | awk -F '-m ' '{print $2}' | awk '{print $1}' | xargs basename)
if [ -n "$model_file" ]; then
model=${model_file%.gguf}
slug=$(sqlite3 "$DB_PATH" "SELECT slug FROM models WHERE file_name='$model_file' LIMIT 1;")
if [ -z "$slug" ]; then
slug="unknown"
fi
else
model="Unknown"
slug="unknown"
fi
echo "$pid|$slug|$model"
done
) | column -t -s '|'
}
# Model Operations Functions
print_help() {
local command="$1"
local description="$2"
local args="$3"
echo "Usage: gguf $command $args"
echo "$description"
echo
echo "Arguments:"
echo "$args"
}
run_model_operation() {
local slug="$1"
local operation="$2"
local data="$3"
shift 3
ensure_server_running "$slug" || return 1
sleep 2
local response
response=$(api_request POST "$operation" "$data")
handle_response "$response" \
"echo \"\$content\" | jq '.' | trim_blank_lines" \
"log_error \"Failed to perform $operation. Status code: \$status_code\"; echo \"\$content\""
}
chat_model() {
local slug="$1"
shift
if [ "$slug" == "--help" ]; then
print_help "chat" "Start an interactive chat session with the specified model." "<slug> The slug of the model to chat with"
return 0
fi
ensure_server_running "$slug" || return 1
log_info "Starting chat session. Type 'exit' to end."
declare -a CHAT=()
while true; do
read -r -e -p "User: " user_input
[[ "$user_input" == "exit" ]] && break
chat_completion "$user_input"
done
log_info "Chat session ended."
}
chat_completion() {
local prompt="$(trim_trailing "$(format_prompt "$1")")"
local data="$(echo -n "$prompt" | jq -Rs --arg temp "$TEMPERATURE" --arg topk "$TOP_K" --arg topp "$TOP_P" --arg np "$N_PREDICT" '{
prompt: .,
temperature: ($temp | tonumber),
top_k: ($topk | tonumber),
top_p: ($topp | tonumber),
n_predict: ($np | tonumber),
cache_prompt: true,
stop: ["\n### Human:"],
stream: true
}')"
local answer=''
local debug_output="/tmp/chat_completion_debug.log"
echo "Sending request with data:" > "$debug_output"
echo "$data" >> "$debug_output"
while IFS= read -r line; do
echo "Received line: $line" >> "$debug_output"
if [[ $line = data:* ]]; then
local content="$(echo "${line:5}" | jq -r '.content')"
printf "%s" "${content}"
answer+="${content}"
elif [[ $line = error:* ]]; then
echo "Error: ${line:6}" >&2
return 1
fi
done < <(curl \
--silent \
--no-buffer \
--request POST \
--url "${API_URL}/completion" \
--header "Content-Type: application/json" \
--data-raw "${data}" 2>> "$debug_output")
if [ -z "$answer" ]; then
echo "No response received from the server. Check $debug_output for details." >&2
return 1
fi
printf "\n"
# Update chat history
CHAT+=("$1" "$(trim "$answer")")
}
format_prompt() {
local instruction="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
echo -n "${instruction}"
printf "\n### Human: %s\n### Assistant: %s" "${CHAT[@]}" "$1"
}
complete_model() {
local slug="$1"
shift
if [ "$slug" == "--help" ]; then
print_help "complete" "Generate text completion for the given prompt." "<slug> The slug of the model to use
<prompt> The prompt for text completion"
return 0
fi
local prompt="$*"
local data
data=$(jq -n --arg p "$prompt" --arg n "$N_PREDICT" --arg t "$TEMPERATURE" --arg k "$TOP_K" --arg tp "$TOP_P" \
'{prompt: $p, n_predict: ($n|tonumber), temperature: ($t|tonumber), top_k: ($k|tonumber), top_p: ($tp|tonumber)}')
run_model_operation "$slug" "completion" "$data"
}
embed_model() {
local slug="$1" text="$2"
if [ "$slug" == "--help" ]; then
print_help "embed" "Generate embeddings for the given text using the specified model." "<model_slug> The slug of the model to use for embedding
<text> The text to generate embeddings for"
echo
echo "Example:"
echo " gguf embed chat \"Hello, world!\""
return 0
fi
run_model_operation "$slug" "embedding" "{\"content\": \"$text\"}"
}
tokenize_text() {
local slug="$1" text="$2"
if [ "$slug" == "--help" ] || [ "$text" == "--help" ]; then
print_help "tokenize" "Tokenize the given text using the specified model." "<model_slug> The slug of the model to use for tokenization
<text> The text to tokenize"
return 0
fi
run_model_operation "$slug" "tokenize" "{\"content\": \"$text\"}"
}
detokenize_text() {
local slug="$1" tokens="$2"
if [ "$slug" == "--help" ]; then
print_help "detokenize" "Detokenize the given tokens using the specified model." "<model_slug> The slug of the model to use for detokenization
<tokens> The tokens to detokenize (as a JSON array)"
return 0
fi
run_model_operation "$slug" "detokenize" "{\"tokens\": $tokens}"
}
# Server Information Functions
check_health() {
if [ "$1" == "--help" ]; then
echo "Usage: gguf health"
echo "Check the health status of the running server."
return 0
fi
local response status_code body
response=$(curl -s -w "\n%{http_code}" http://localhost:$DEFAULT_PORT/health)
status_code=$(echo "$response" | tail -n1)
body=$(echo "$response" | sed '$d')
if [ "$status_code" -eq 200 ]; then
log_info "Server is healthy."
echo "$body" | jq '.'
else
log_error "Server not healthy (Status code: $status_code)"
echo "$body"
fi
}
get_recent_models() {
if [ "$1" == "--help" ]; then
echo "Usage: gguf recent"
echo "Get the 20 most recent GGUF models from Hugging Face."
return 0
fi
curl -s -X GET "https://huggingface.co/api/models?filter=gguf&sort=lastModified" |
jq -r '.[] | select(.tags | contains(["gguf"])) |
[.modelId, .lastModified, (.likes // 0), (.downloads // 0)] |
@tsv' |
head -n 20 |
(echo -e "MODEL ID\tLAST MODIFIED\tLIKES\tDOWNLOADS" && cat) |
column -t -s $'\t'
}
get_trending_models() {
if [ "$1" == "--help" ]; then
echo "Usage: gguf trending"
echo "Get the top 20 trending GGUF models from Hugging Face by a combination of likes and downloads, with likes as the primary sort criteria."
return 0
fi
curl -s -X GET "https://huggingface.co/api/models?filter=gguf&sort=lastModified" |
jq -r '.[] | select(.tags | contains(["gguf"])) |
[.modelId, .lastModified, (.likes // 0), (.downloads // 0)] |
@tsv' |
sort -nr -k3,4 |
head -n 20 |
(echo -e "MODEL ID\tLAST MODIFIED\tLIKES\tDOWNLOADS" && cat) |
column -t -s $'\t'
}
get_server_props() {
if [ "$1" == "--help" ]; then
echo "Usage: gguf props"
echo "Get the properties of the running server."
return 0
fi
curl -s http://localhost:$DEFAULT_PORT/props | jq '.'
}
# Usage Information
print_usage() {
local NC='\033[0m'
local CYAN='\033[0;36m'
local GREEN='\033[0;32m'
local YELLOW='\033[0;33m'
local MAGENTA='\033[0;35m'
local DARK_GRAY='\033[0;90m'
echo -e "${CYAN}Usage:${NC} gguf ${GREEN}<command>${NC} [options]"
echo
echo -e "${YELLOW}Model Management:${NC}"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "pull <model_id>" "......................." "Download a new model"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "rm <slug>" "......................." "Remove a model"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "ls" "......................." "List all models"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "alias <old> <new>" "......................." "Create an alias for a model"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "import" "......................." "Import existing models"
echo
echo -e "${YELLOW}Model Operations:${NC}"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "run <slug> [text]" "......................." "Run a model server and optionally complete text"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "chat <slug>" "......................." "Start a chat session"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "embed <slug> <text>" "......................." "Generate embeddings"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "tokenize <slug> <text>" "......................." "Tokenize text"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "detokenize <slug> <tokens>" "......................." "Detokenize text"
echo
echo -e "${YELLOW}Server Information:${NC}"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "health" "......................." "Check server health"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "props" "......................." "Get server properties"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "ps" "......................." "Show running processes"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "kill <slug|all>" "......................." "Kill a model server"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "reset" "......................." "Reset the database"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "recent" "......................." "Get most recent GGUF models"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "trending" "......................." "Get trending GGUF models"
echo
echo -e "${YELLOW}Advanced Features:${NC}"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "lora <subcommand>" "......................." "Manage LoRA adapters"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "metrics" "......................." "Get server metrics"
printf " ${GREEN}%-26s${NC} ${DARK_GRAY}%s${NC} %s\n" "slots <subcommand>" "......................." "Manage server slots"
echo
echo -e "${MAGENTA}For more information, use:${NC} gguf ${GREEN}<command> --help${NC}"
}
# Placeholder functions for new commands
lora_command() {
echo "LoRA management is not implemented yet."
}
get_metrics() {
echo "Metrics retrieval is not implemented yet."
}
manage_slots() {
echo "Slot management is not implemented yet."
}
# Main Script Logic
main() {
# Initialize the database
init_database
case "$1" in
pull)
shift
pull_model "$@"
;;
rm)
shift
remove_model "$@"
;;
ls)
list_models
;;
alias)
shift
alias_model "$@"
;;
import)
shift
import_existing_models "$@"
;;
reset)
reset_db
;;
run)
shift
run_model "$@"
;;
chat)
shift
chat_model "$@"
;;
embed)
shift
embed_model "$@"
;;
tokenize)
shift
tokenize_text "$@"
;;
detokenize)
shift
detokenize_text "$@"
;;
health)
check_health
;;
props)
get_server_props
;;
ps)
show_processes
;;
kill)
shift
if [ "$1" = "all" ]; then
kill_all_servers
else
kill_model "$@"
fi
;;
recent)
get_recent_models "$@"
;;
trending)
get_trending_models "$@"
;;
lora)
shift
lora_command "$@"
;;
metrics)
get_metrics
;;
slots)
shift
manage_slots "$@"
;;
*)
print_usage
;;
esac
}
main "$@"