From 587443b6c554cfa699dc1e542aa7d9dd811b3a6a Mon Sep 17 00:00:00 2001
From: Stefanache Ion <ion.stefanache@yahoo.com>
Date: Sun, 8 Sep 2024 10:35:07 +0300
Subject: [PATCH] Add files via upload

---
 python/llamafile_llava/llama.log | 59 ++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 python/llamafile_llava/llama.log

diff --git a/python/llamafile_llava/llama.log b/python/llamafile_llava/llama.log
new file mode 100644
index 000000000..5219818ab
--- /dev/null
+++ b/python/llamafile_llava/llama.log
@@ -0,0 +1,59 @@
+clip_model_load: model name:   openai/clip-vit-large-patch14-336
+clip_model_load: description:  image encoder for LLaVA
+clip_model_load: GGUF version: 3
+clip_model_load: alignment:    32
+clip_model_load: n_tensors:    377
+clip_model_load: n_kv:         19
+clip_model_load: ftype:        q4_0
+clip_model_load: loaded meta data with 19 key-value pairs and 377 tensors from llava-v1.5-7b-mmproj-Q4_0.gguf
+clip_model_load: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+clip_model_load: - kv   0:                       general.architecture str              = clip
+clip_model_load: - kv   1:                      clip.has_text_encoder bool             = false
+clip_model_load: - kv   2:                    clip.has_vision_encoder bool             = true
+clip_model_load: - kv   3:                   clip.has_llava_projector bool             = true
+clip_model_load: - kv   4:                          general.file_type u32              = 2
+clip_model_load: - kv   5:                               general.name str              = openai/clip-vit-large-patch14-336
+clip_model_load: - kv   6:                        general.description str              = image encoder for LLaVA
+clip_model_load: - kv   7:                     clip.vision.image_size u32              = 336
+clip_model_load: - kv   8:                     clip.vision.patch_size u32              = 14
+clip_model_load: - kv   9:               clip.vision.embedding_length u32              = 1024
+clip_model_load: - kv  10:            clip.vision.feed_forward_length u32              = 4096
+clip_model_load: - kv  11:                 clip.vision.projection_dim u32              = 768
+clip_model_load: - kv  12:           clip.vision.attention.head_count u32              = 16
+clip_model_load: - kv  13:   clip.vision.attention.layer_norm_epsilon f32              = 0.000010
+clip_model_load: - kv  14:                    clip.vision.block_count u32              = 23
+clip_model_load: - kv  15:                     clip.vision.image_mean arr[f32,3]       = [0.481455, 0.457828, 0.408211]
+clip_model_load: - kv  16:                      clip.vision.image_std arr[f32,3]       = [0.268630, 0.261303, 0.275777]
+clip_model_load: - kv  17:                              clip.use_gelu bool             = false
+clip_model_load: - kv  18:               general.quantization_version u32              = 2
+clip_model_load: - type  f32:  235 tensors
+clip_model_load: - type  f16:    1 tensors
+clip_model_load: - type q4_0:  141 tensors
+clip_model_load: CLIP using CPU backend
+clip_model_load: text_encoder:   0
+clip_model_load: vision_encoder: 1
+clip_model_load: llava_projector:  1
+clip_model_load: model size:     169.18 MB
+clip_model_load: metadata size:  0.17 MB
+clip_model_load: params backend buffer size =  169.18 MB (377 tensors)
+get_key_idx: note: key clip.vision.image_grid_pinpoints not found in file
+get_key_idx: note: key clip.vision.mm_patch_merge_type not found in file
+get_key_idx: note: key clip.vision.image_crop_resolution not found in file
+clip_model_load: compute allocated memory: 32.89 MB
+warming up the model with an empty run
+
+llama server listening at http://127.0.0.1:8080
+
+warning: this OS doesn't support pledge() security
+encode_image_with_clip: image embedding created: 576 tokens
+encode_image_with_clip: image encoded in  5062.39 ms by CLIP (    8.79 ms per image patch)
+evaluated 613 image tokens in 63913863 us at 9.59103 tok/sec
+encode_image_with_clip: image embedding created: 576 tokens
+encode_image_with_clip: image encoded in  4649.30 ms by CLIP (    8.07 ms per image patch)
+evaluated 613 image tokens in 66449226 us at 9.22509 tok/sec
+encode_image_with_clip: image embedding created: 576 tokens
+encode_image_with_clip: image encoded in  5040.41 ms by CLIP (    8.75 ms per image patch)
+evaluated 613 image tokens in 64890561 us at 9.44667 tok/sec
+encode_image_with_clip: image embedding created: 576 tokens
+encode_image_with_clip: image encoded in  4951.46 ms by CLIP (    8.60 ms per image patch)
+evaluated 613 image tokens in 70732757 us at 8.66642 tok/sec