Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Thewh1teagle/patch 1 #488

Merged
merged 3 commits into from
Aug 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[workspace]
resolver = "2"
members = ["llama-cpp-sys-2", "llama-cpp-2", "embeddings", "examples/usage", "examples/simple"]
members = ["llama-cpp-sys-2", "llama-cpp-2", "embeddings", "examples/simple"]

[workspace.dependencies]
# core library deps
Expand Down
20 changes: 10 additions & 10 deletions examples/usage/src/main.rs → examples/usage.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
//! # Usage
//!
//!
//! This is just about the smallest possible way to do inference. To fetch a model from hugging face:
//!
//! ```bash
//!
//! ```console
//! git clone --recursive https://github.com/utilityai/llama-cpp-rs
//! cd llama-cpp-rs/examples/usage
//! wget https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-GGUF/resolve/main/qwen2-1_5b-instruct-q4_0.gguf
//! cargo run --bin usage -- qwen2-1_5b-instruct-q4_0.gguf
//! cargo run --example usage -- qwen2-1_5b-instruct-q4_0.gguf
//! ```
use std::io::Write;
use llama_cpp_2::context::params::LlamaContextParams;
use llama_cpp_2::llama_backend::LlamaBackend;
use llama_cpp_2::llama_batch::LlamaBatch;
use llama_cpp_2::model::params::LlamaModelParams;
use llama_cpp_2::model::LlamaModel;
use llama_cpp_2::model::{AddBos, Special};
use llama_cpp_2::token::data_array::LlamaTokenDataArray;
use std::io::Write;

#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)]
fn main() {
let model_path = std::env::args().nth(1).expect("Please specify model path");
let backend = LlamaBackend::init().unwrap();
let params = LlamaModelParams::default();

let prompt = "<|im_start|>user\nHello! how are you?<|im_end|>\n<|im_start|>assistant\n".to_string();
let prompt =
"<|im_start|>user\nHello! how are you?<|im_end|>\n<|im_start|>assistant\n".to_string();
LlamaContextParams::default();
let model =
LlamaModel::load_from_file(&backend, model_path, &params).expect("unable to load model");
Expand All @@ -48,14 +49,11 @@ fn main() {
}
ctx.decode(&mut batch).expect("llama_decode() failed");


let mut n_cur = batch.n_tokens();


// The `Decoder`
let mut decoder = encoding_rs::UTF_8.new_decoder();


while n_cur <= n_len {
// sample the next token
{
Expand All @@ -72,7 +70,9 @@ fn main() {
break;
}

let output_bytes = model.token_to_bytes(new_token_id, Special::Tokenize).unwrap();
let output_bytes = model
.token_to_bytes(new_token_id, Special::Tokenize)
.unwrap();
// use `Decoder.decode_to_string()` to avoid the intermediate buffer
let mut output_string = String::with_capacity(32);
let _decode_result = decoder.decode_to_string(&output_bytes, &mut output_string, false);
Expand Down
19 changes: 0 additions & 19 deletions examples/usage/Cargo.toml

This file was deleted.

7 changes: 7 additions & 0 deletions llama-cpp-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.69" }
thiserror = { workspace = true }
tracing = { workspace = true }

[dev-dependencies]
encoding_rs = { workspace = true }

[features]
cuda = ["llama-cpp-sys-2/cuda"]
metal = ["llama-cpp-sys-2/metal"]
Expand All @@ -32,3 +35,7 @@ workspace = true

[package.metadata.docs.rs]
features = ["sampler"]

[[example]]
name = "usage"
path = "../examples/usage.rs"
2 changes: 1 addition & 1 deletion llama-cpp-2/src/context/sample/sampler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! like [`crate::context::LlamaContext`] or token history to the sampler.
//!
//! # Example
//!
//!
//! **Llama.cpp default sampler**
//!
//! ```rust
Expand Down
8 changes: 6 additions & 2 deletions llama-cpp-2/src/model.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! A safe wrapper around `llama_model`.
use std::ffi::CString;
use std::ffi::CStr;
use std::ffi::CString;
use std::num::NonZeroU16;
use std::os::raw::c_int;
use std::path::Path;
Expand Down Expand Up @@ -550,7 +550,11 @@ impl LlamaModel {
if res > buff.len() as i32 {
return Err(ApplyChatTemplateError::BuffSizeError);
}
Ok::<String, ApplyChatTemplateError>(CStr::from_ptr(buff.as_mut_ptr()).to_string_lossy().to_string())
Ok::<String, ApplyChatTemplateError>(
CStr::from_ptr(buff.as_mut_ptr())
.to_string_lossy()
.to_string(),
)
}?;
Ok(formatted_chat)
}
Expand Down
8 changes: 2 additions & 6 deletions llama-cpp-2/src/model/params/kv_overrides.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,13 @@ impl ParamOverrideValue {
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_bool: *value }
}
ParamOverrideValue::Float(value) => {
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 {
val_f64: *value,
}
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_f64: *value }
}
ParamOverrideValue::Int(value) => {
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_i64: *value }
}
ParamOverrideValue::Str(c_string) => {
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 {
val_str: *c_string,
}
llama_cpp_sys_2::llama_model_kv_override__bindgen_ty_1 { val_str: *c_string }
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion llama-cpp-2/src/token_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl TryFrom<llama_cpp_sys_2::llama_token_type> for LlamaTokenAttrs {
type Error = LlamaTokenTypeFromIntError;

fn try_from(value: llama_cpp_sys_2::llama_vocab_type) -> Result<Self, Self::Error> {
Ok(Self(BitFlags::from_bits(value).map_err(|e| {
Ok(Self(BitFlags::from_bits(value as _).map_err(|e| {
LlamaTokenTypeFromIntError::UnknownValue(e.invalid_bits())
})?))
}
Expand Down
49 changes: 29 additions & 20 deletions llama-cpp-sys-2/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,9 @@ fn compile_bindings(
llama_header_path: &Path,
) -> Result<(), Box<dyn std::error::Error + 'static>> {
println!("Generating bindings..");

let includes = [
llama_header_path.join("ggml").join("include"),
];


let includes = [llama_header_path.join("ggml").join("include")];

let bindings = bindgen::Builder::default()
.clang_args(includes.map(|path| format!("-I{}", path.to_string_lossy())))
.header(
Expand Down Expand Up @@ -425,9 +423,7 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati
// nvcc.flag("-Wno-pedantic");
// }

for lib in [
"cuda", "cublas", "cudart", "cublasLt"
] {
for lib in ["cuda", "cublas", "cudart", "cublasLt"] {
println!("cargo:rustc-link-lib={}", lib);
}
if !nvcc.get_compiler().is_like_msvc() {
Expand Down Expand Up @@ -623,31 +619,44 @@ fn gen_vulkan_shaders(out_path: impl AsRef<Path>) -> (impl AsRef<Path>, impl AsR
.cpp(true)
.get_compiler();

assert!(!cxx.is_like_msvc(), "Compiling Vulkan GGML with MSVC is not supported at this time.");
assert!(
!cxx.is_like_msvc(),
"Compiling Vulkan GGML with MSVC is not supported at this time."
);

let vulkan_shaders_gen_bin = out_path.as_ref().join("vulkan-shaders-gen");

cxx.to_command()
.args([
vulkan_shaders_src.join("vulkan-shaders-gen.cpp").as_os_str(),
"-o".as_ref(), vulkan_shaders_gen_bin.as_os_str()
vulkan_shaders_src
.join("vulkan-shaders-gen.cpp")
.as_os_str(),
"-o".as_ref(),
vulkan_shaders_gen_bin.as_os_str(),
])
.output().expect("Could not compile Vulkan shader generator");
.output()
.expect("Could not compile Vulkan shader generator");

let header = out_path.as_ref().join("ggml-vulkan-shaders.hpp");
let source = out_path.as_ref().join("ggml-vulkan-shaders.cpp");

Command::new(vulkan_shaders_gen_bin)
.args([
"--glslc".as_ref(), "glslc".as_ref(),
"--input-dir".as_ref(), vulkan_shaders_src.as_os_str(),
"--output-dir".as_ref(), out_path.as_ref().join("vulkan-shaders.spv").as_os_str(),
"--target-hpp".as_ref(), header.as_os_str(),
"--target-cpp".as_ref(), source.as_os_str(),
"--no-clean".as_ref()
"--glslc".as_ref(),
"glslc".as_ref(),
"--input-dir".as_ref(),
vulkan_shaders_src.as_os_str(),
"--output-dir".as_ref(),
out_path.as_ref().join("vulkan-shaders.spv").as_os_str(),
"--target-hpp".as_ref(),
header.as_os_str(),
"--target-cpp".as_ref(),
source.as_os_str(),
"--no-clean".as_ref(),
])
.output().expect("Could not run Vulkan shader generator");

.output()
.expect("Could not run Vulkan shader generator");

(out_path, source)
}

Expand Down
Loading