src: update llama.cpp submodule

Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>
sobelio · Dec 5, 2023 · b667780 · b667780
1 parent 922c1b9
commit b667780
Show file tree

Hide file tree

Showing 2 changed files with 195 additions and 1 deletion.
diff --git a/crates/llm-chain-llama-sys/llama.cpp b/crates/llm-chain-llama-sys/llama.cpp
diff --git a/crates/llm-chain-llama-sys/src/bindings.rs b/crates/llm-chain-llama-sys/src/bindings.rs
@@ -2113,6 +2113,14 @@ extern "C" {
 extern "C" {
     pub fn ggml_soft_max_inplace(ctx: *mut ggml_context, a: *mut ggml_tensor) -> *mut ggml_tensor;
 }
+extern "C" {
+    pub fn ggml_soft_max_ext(
+        ctx: *mut ggml_context,
+        a: *mut ggml_tensor,
+        mask: *mut ggml_tensor,
+        scale: f32,
+    ) -> *mut ggml_tensor;
+}
 extern "C" {
     pub fn ggml_soft_max_back(
         ctx: *mut ggml_context,
@@ -3878,6 +3886,12 @@ extern "C" {
         key_id: ::std::os::raw::c_int,
     ) -> *const ::std::os::raw::c_char;
 }
+extern "C" {
+    pub fn gguf_get_val_data(
+        ctx: *const gguf_context,
+        key_id: ::std::os::raw::c_int,
+    ) -> *const ::std::os::raw::c_void;
+}
 extern "C" {
     pub fn gguf_get_arr_n(
         ctx: *const gguf_context,
@@ -6291,6 +6305,33 @@ extern "C" {
 extern "C" {
     pub fn llama_rope_freq_scale_train(model: *const llama_model) -> f32;
 }
+extern "C" {
+    pub fn llama_model_meta_val_str(
+        model: *const llama_model,
+        key: *const ::std::os::raw::c_char,
+        buf: *mut ::std::os::raw::c_char,
+        buf_size: usize,
+    ) -> ::std::os::raw::c_int;
+}
+extern "C" {
+    pub fn llama_model_meta_count(model: *const llama_model) -> ::std::os::raw::c_int;
+}
+extern "C" {
+    pub fn llama_model_meta_key_by_index(
+        model: *const llama_model,
+        i: ::std::os::raw::c_int,
+        buf: *mut ::std::os::raw::c_char,
+        buf_size: usize,
+    ) -> ::std::os::raw::c_int;
+}
+extern "C" {
+    pub fn llama_model_meta_val_str_by_index(
+        model: *const llama_model,
+        i: ::std::os::raw::c_int,
+        buf: *mut ::std::os::raw::c_char,
+        buf_size: usize,
+    ) -> ::std::os::raw::c_int;
+}
 extern "C" {
     pub fn llama_model_desc(
         model: *const llama_model,
@@ -6335,9 +6376,162 @@ extern "C" {
         n_threads: ::std::os::raw::c_int,
     ) -> ::std::os::raw::c_int;
 }
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct llama_kv_cache_view_cell {
+    pub pos: llama_pos,
+}
+#[test]
+fn bindgen_test_layout_llama_kv_cache_view_cell() {
+    const UNINIT: ::std::mem::MaybeUninit<llama_kv_cache_view_cell> =
+        ::std::mem::MaybeUninit::uninit();
+    let ptr = UNINIT.as_ptr();
+    assert_eq!(
+        ::std::mem::size_of::<llama_kv_cache_view_cell>(),
+        4usize,
+        concat!("Size of: ", stringify!(llama_kv_cache_view_cell))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<llama_kv_cache_view_cell>(),
+        4usize,
+        concat!("Alignment of ", stringify!(llama_kv_cache_view_cell))
+    );
+    assert_eq!(
+        unsafe { ::std::ptr::addr_of!((*ptr).pos) as usize - ptr as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(llama_kv_cache_view_cell),
+            "::",
+            stringify!(pos)
+        )
+    );
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct llama_kv_cache_view {
+    pub n_cells: i32,
+    pub n_max_seq: i32,
+    pub token_count: i32,
+    pub used_cells: i32,
+    pub max_contiguous: i32,
+    pub max_contiguous_idx: i32,
+    pub cells: *mut llama_kv_cache_view_cell,
+    pub cells_sequences: *mut llama_seq_id,
+}
+#[test]
+fn bindgen_test_layout_llama_kv_cache_view() {
+    const UNINIT: ::std::mem::MaybeUninit<llama_kv_cache_view> = ::std::mem::MaybeUninit::uninit();
+    let ptr = UNINIT.as_ptr();
+    assert_eq!(
+        ::std::mem::size_of::<llama_kv_cache_view>(),
+        40usize,
+        concat!("Size of: ", stringify!(llama_kv_cache_view))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<llama_kv_cache_view>(),
+        8usize,
+        concat!("Alignment of ", stringify!(llama_kv_cache_view))
+    );
+    assert_eq!(
+        unsafe { ::std::ptr::addr_of!((*ptr).n_cells) as usize - ptr as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(llama_kv_cache_view),
+            "::",
+            stringify!(n_cells)
+        )
+    );
+    assert_eq!(
+        unsafe { ::std::ptr::addr_of!((*ptr).n_max_seq) as usize - ptr as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(llama_kv_cache_view),
+            "::",
+            stringify!(n_max_seq)
+        )
+    );
+    assert_eq!(
+        unsafe { ::std::ptr::addr_of!((*ptr).token_count) as usize - ptr as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(llama_kv_cache_view),
+            "::",
+            stringify!(token_count)
+        )
+    );
+    assert_eq!(
+        unsafe { ::std::ptr::addr_of!((*ptr).used_cells) as usize - ptr as usize },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(llama_kv_cache_view),
+            "::",
+            stringify!(used_cells)
+        )
+    );
+    assert_eq!(
+        unsafe { ::std::ptr::addr_of!((*ptr).max_contiguous) as usize - ptr as usize },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(llama_kv_cache_view),
+            "::",
+            stringify!(max_contiguous)
+        )
+    );
+    assert_eq!(
+        unsafe { ::std::ptr::addr_of!((*ptr).max_contiguous_idx) as usize - ptr as usize },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(llama_kv_cache_view),
+            "::",
+            stringify!(max_contiguous_idx)
+        )
+    );
+    assert_eq!(
+        unsafe { ::std::ptr::addr_of!((*ptr).cells) as usize - ptr as usize },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(llama_kv_cache_view),
+            "::",
+            stringify!(cells)
+        )
+    );
+    assert_eq!(
+        unsafe { ::std::ptr::addr_of!((*ptr).cells_sequences) as usize - ptr as usize },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(llama_kv_cache_view),
+            "::",
+            stringify!(cells_sequences)
+        )
+    );
+}
+extern "C" {
+    pub fn llama_kv_cache_view_init(
+        ctx: *const llama_context,
+        n_max_seq: i32,
+    ) -> llama_kv_cache_view;
+}
+extern "C" {
+    pub fn llama_kv_cache_view_free(view: *mut llama_kv_cache_view);
+}
+extern "C" {
+    pub fn llama_kv_cache_view_update(ctx: *const llama_context, view: *mut llama_kv_cache_view);
+}
 extern "C" {
     pub fn llama_get_kv_cache_token_count(ctx: *const llama_context) -> ::std::os::raw::c_int;
 }
+extern "C" {
+    pub fn llama_get_kv_cache_used_cells(ctx: *const llama_context) -> ::std::os::raw::c_int;
+}
 extern "C" {
     pub fn llama_kv_cache_clear(ctx: *mut llama_context);
 }
+4 −0		.devops/tools.sh
+11 −0		.github/workflows/build.yml
+14 −12		.gitignore
+10 −1		CMakeLists.txt
+22 −12		Makefile
+16 −30		Package.swift
+9 −4		README.md
+7 −2		common/CMakeLists.txt
+79 −0		common/common.cpp
+11 −0		common/common.h
+1 −1		common/grammar-parser.cpp
+145 −15		convert-hf-to-gguf.py
+1 −1		convert.py
+ −		docs/llama-star/idea-arch.key
+ −		docs/llama-star/idea-arch.pdf
+1 −0		examples/CMakeLists.txt
+1 −1		examples/batched-bench/batched-bench.cpp
+1 −1		examples/batched.swift/README.md
+7 −10		examples/batched.swift/Sources/main.swift
+1 −0		examples/llama.swiftui/.gitignore
+7 −0		examples/llama.swiftui/README.md
+208 −0		examples/llama.swiftui/llama.cpp.swift/LibLlama.swift
+5 −0		examples/llama.swiftui/llama.cpp.swift/bridging-header.h
+481 −0		examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj
+7 −0		examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/contents.xcworkspacedata
+8 −0		examples/llama.swiftui/llama.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
+11 −0		examples/llama.swiftui/llama.swiftui/Assets.xcassets/AccentColor.colorset/Contents.json
+13 −0		examples/llama.swiftui/llama.swiftui/Assets.xcassets/AppIcon.appiconset/Contents.json
+6 −0		examples/llama.swiftui/llama.swiftui/Assets.xcassets/Contents.json
+45 −0		examples/llama.swiftui/llama.swiftui/Models/LlamaState.swift
+6 −0		examples/llama.swiftui/llama.swiftui/Preview Content/Preview Assets.xcassets/Contents.json
+0 −0		examples/llama.swiftui/llama.swiftui/Resources/models/.gitignore
+42 −0		examples/llama.swiftui/llama.swiftui/UI/ContentView.swift
+10 −0		examples/llama.swiftui/llama.swiftui/llama_swiftuiApp.swift
+37 −15		examples/llava/convert-image-encoder-to-gguf.py
+5 −0		examples/lookahead/CMakeLists.txt
+7 −0		examples/lookahead/README.md
+487 −0		examples/lookahead/lookahead.cpp
+7 −0		examples/main/main.cpp
+10 −1		examples/parallel/parallel.cpp
+49 −0		examples/server/README.md
+26 −21		examples/server/api_like_OAI.py
+497 −21		examples/server/server.cpp
+1 −1		examples/simple/simple.cpp
+8 −0		examples/speculative/README.md
+1 −1		ggml-alloc.c
+111 −59		ggml-cuda.cu
+30 −18		ggml-metal.m
+93 −93		ggml-metal.metal
+5 −7		ggml-opencl.cpp
+72 −42		ggml.c
+10 −3		ggml.h
+20 −0		gguf-py/gguf/constants.py
+10 −8		gguf-py/gguf/tensor_mapping.py
+434 −58		llama.cpp
+55 −4		llama.h
+1 −0		prompts/chat-with-qwen.txt
+3 −0		requirements-hf-to-gguf.txt
+0 −22		scripts/build-info.cmake
+24 −0		scripts/gen-build-info-cpp.cmake