Skip to content

Commit

Permalink
Merge branch 'main' into openai
Browse files Browse the repository at this point in the history
  • Loading branch information
williamhogman authored Jan 10, 2024
2 parents eebd969 + b7b769c commit 520940d
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 6 deletions.
13 changes: 9 additions & 4 deletions crates/llm-chain-llama/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::ptr::null_mut;
pub struct ModelParams {
pub n_gpu_layers: i32,
pub main_gpu: i32,
pub tensor_split: Vec<f32>,
pub tensor_split: Option<Vec<f32>>,
pub vocab_only: bool,
pub use_mmap: bool,
pub use_mlock: bool,
Expand All @@ -33,10 +33,15 @@ impl Default for ModelParams {

impl From<ModelParams> for llama_model_params {
fn from(params: ModelParams) -> Self {
let tensor_split = if let Some(tensor_split_vec) = params.tensor_split {
tensor_split_vec.as_ptr() as *const f32
} else {
std::ptr::null()
};
llama_model_params {
n_gpu_layers: params.n_gpu_layers,
main_gpu: params.main_gpu,
tensor_split: params.tensor_split.as_ptr() as *const f32,
tensor_split,
vocab_only: params.vocab_only,
use_mmap: params.use_mmap,
use_mlock: params.use_mlock,
Expand All @@ -50,11 +55,11 @@ impl From<llama_model_params> for ModelParams {
fn from(params: llama_model_params) -> Self {
let tensor_split = unsafe {
if params.tensor_split.is_null() {
Vec::new()
None
} else {
let slice =
std::slice::from_raw_parts(params.tensor_split, LLAMA_MAX_DEVICES as usize);
slice.to_vec()
Some(slice.to_vec())
}
};
ModelParams {
Expand Down
2 changes: 1 addition & 1 deletion crates/llm-chain-llama/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ lazy_static! {
StopSequence: vec!["\n\n".to_string()],
NGpuLayers: 0_i32,
MainGpu: 0_i32,
TensorSplit: Vec::new(),
TensorSplit: None,
VocabOnly: false,
UseMmap: true,
UseMlock: false
Expand Down
2 changes: 1 addition & 1 deletion crates/llm-chain/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ pub enum Opt {
// The GPU that should be used for scratch and small tensors for llm-chain-llama.
MainGpu(i32),
// How the layers should be split accross the available GPUs for llm-chain-llama.
TensorSplit(Vec<f32>),
TensorSplit(Option<Vec<f32>>),
// Only load the vocabulary for llm-chain-llama, no weights will be loaded.
VocabOnly(bool),
// Use memory mapped files for llm-chain-llama where possible.
Expand Down

0 comments on commit 520940d

Please sign in to comment.