From 587443b6c554cfa699dc1e542aa7d9dd811b3a6a Mon Sep 17 00:00:00 2001 From: Stefanache Ion Date: Sun, 8 Sep 2024 10:35:07 +0300 Subject: [PATCH] Add files via upload --- python/llamafile_llava/llama.log | 59 ++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 python/llamafile_llava/llama.log diff --git a/python/llamafile_llava/llama.log b/python/llamafile_llava/llama.log new file mode 100644 index 000000000..5219818ab --- /dev/null +++ b/python/llamafile_llava/llama.log @@ -0,0 +1,59 @@ +clip_model_load: model name: openai/clip-vit-large-patch14-336 +clip_model_load: description: image encoder for LLaVA +clip_model_load: GGUF version: 3 +clip_model_load: alignment: 32 +clip_model_load: n_tensors: 377 +clip_model_load: n_kv: 19 +clip_model_load: ftype: q4_0 +clip_model_load: loaded meta data with 19 key-value pairs and 377 tensors from llava-v1.5-7b-mmproj-Q4_0.gguf +clip_model_load: Dumping metadata keys/values. Note: KV overrides do not apply in this output. +clip_model_load: - kv 0: general.architecture str = clip +clip_model_load: - kv 1: clip.has_text_encoder bool = false +clip_model_load: - kv 2: clip.has_vision_encoder bool = true +clip_model_load: - kv 3: clip.has_llava_projector bool = true +clip_model_load: - kv 4: general.file_type u32 = 2 +clip_model_load: - kv 5: general.name str = openai/clip-vit-large-patch14-336 +clip_model_load: - kv 6: general.description str = image encoder for LLaVA +clip_model_load: - kv 7: clip.vision.image_size u32 = 336 +clip_model_load: - kv 8: clip.vision.patch_size u32 = 14 +clip_model_load: - kv 9: clip.vision.embedding_length u32 = 1024 +clip_model_load: - kv 10: clip.vision.feed_forward_length u32 = 4096 +clip_model_load: - kv 11: clip.vision.projection_dim u32 = 768 +clip_model_load: - kv 12: clip.vision.attention.head_count u32 = 16 +clip_model_load: - kv 13: clip.vision.attention.layer_norm_epsilon f32 = 0.000010 +clip_model_load: - kv 14: clip.vision.block_count u32 = 23 +clip_model_load: - kv 15: clip.vision.image_mean arr[f32,3] = [0.481455, 0.457828, 0.408211] +clip_model_load: - kv 16: clip.vision.image_std arr[f32,3] = [0.268630, 0.261303, 0.275777] +clip_model_load: - kv 17: clip.use_gelu bool = false +clip_model_load: - kv 18: general.quantization_version u32 = 2 +clip_model_load: - type f32: 235 tensors +clip_model_load: - type f16: 1 tensors +clip_model_load: - type q4_0: 141 tensors +clip_model_load: CLIP using CPU backend +clip_model_load: text_encoder: 0 +clip_model_load: vision_encoder: 1 +clip_model_load: llava_projector: 1 +clip_model_load: model size: 169.18 MB +clip_model_load: metadata size: 0.17 MB +clip_model_load: params backend buffer size = 169.18 MB (377 tensors) +get_key_idx: note: key clip.vision.image_grid_pinpoints not found in file +get_key_idx: note: key clip.vision.mm_patch_merge_type not found in file +get_key_idx: note: key clip.vision.image_crop_resolution not found in file +clip_model_load: compute allocated memory: 32.89 MB +warming up the model with an empty run + +llama server listening at http://127.0.0.1:8080 + +warning: this OS doesn't support pledge() security +encode_image_with_clip: image embedding created: 576 tokens +encode_image_with_clip: image encoded in 5062.39 ms by CLIP ( 8.79 ms per image patch) +evaluated 613 image tokens in 63913863 us at 9.59103 tok/sec +encode_image_with_clip: image embedding created: 576 tokens +encode_image_with_clip: image encoded in 4649.30 ms by CLIP ( 8.07 ms per image patch) +evaluated 613 image tokens in 66449226 us at 9.22509 tok/sec +encode_image_with_clip: image embedding created: 576 tokens +encode_image_with_clip: image encoded in 5040.41 ms by CLIP ( 8.75 ms per image patch) +evaluated 613 image tokens in 64890561 us at 9.44667 tok/sec +encode_image_with_clip: image embedding created: 576 tokens +encode_image_with_clip: image encoded in 4951.46 ms by CLIP ( 8.60 ms per image patch) +evaluated 613 image tokens in 70732757 us at 8.66642 tok/sec