-
Notifications
You must be signed in to change notification settings - Fork 308
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for multiple GGUF files (#692)
* Add multi gguf support * Allow passing multiple files * Clippy * Typo
- Loading branch information
1 parent
13f5655
commit 66dba85
Showing
25 changed files
with
453 additions
and
347 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
use std::{collections::HashMap, fs}; | ||
|
||
use anyhow::Context; | ||
use candle_core::{ | ||
quantized::{ | ||
gguf_file::{self, Value}, | ||
QTensor, | ||
}, | ||
Device, Result, | ||
}; | ||
use indexmap::IndexMap; | ||
use tracing::info; | ||
|
||
use crate::DEBUG; | ||
|
||
use super::GGUFArchitecture; | ||
|
||
fn parse_gguf_value(value: &Value) -> String { | ||
match value { | ||
Value::Array(vs) => vs | ||
.iter() | ||
.map(parse_gguf_value) | ||
.collect::<Vec<String>>() | ||
.join(", "), | ||
Value::Bool(b) => b.to_string(), | ||
Value::F32(x) => x.to_string(), | ||
Value::F64(x) => x.to_string(), | ||
Value::I8(x) => x.to_string(), | ||
Value::I16(x) => x.to_string(), | ||
Value::I32(x) => x.to_string(), | ||
Value::I64(x) => x.to_string(), | ||
Value::String(x) => x.to_string(), | ||
Value::U8(x) => x.to_string(), | ||
Value::U16(x) => x.to_string(), | ||
Value::U32(x) => x.to_string(), | ||
Value::U64(x) => x.to_string(), | ||
} | ||
} | ||
|
||
// Internal invariant: contents and readers must be paired. | ||
/// This abstracts the files for a GGUF model and enables multiple files to be used. | ||
pub struct Content<'a, R: std::io::Seek + std::io::Read> { | ||
contents: Vec<gguf_file::Content>, | ||
readers: &'a mut [&'a mut R], | ||
arch: GGUFArchitecture, | ||
all_metadata: HashMap<String, Value>, | ||
} | ||
|
||
impl<'a, R: std::io::Seek + std::io::Read> Content<'a, R> { | ||
/// Create a `Content` from a set of file readers. | ||
pub fn from_readers(readers: &'a mut [&'a mut R]) -> Result<Self> { | ||
let mut contents = Vec::new(); | ||
let n_readers = readers.len(); | ||
for reader in readers.iter_mut() { | ||
contents.push(gguf_file::Content::read(reader)?); | ||
} | ||
let n_splits = contents | ||
.iter() | ||
.filter_map(|ct| { | ||
ct.metadata | ||
.get("split.count") | ||
.map(|val| val.to_u64().unwrap()) | ||
}) | ||
.collect::<Vec<_>>(); | ||
if n_splits.len() > 1 { | ||
candle_core::bail!("Multiple contents have multiple `split.count` fields"); | ||
} | ||
#[allow(clippy::cast_possible_truncation)] | ||
if !n_splits.is_empty() && n_readers != n_splits[0] as usize { | ||
candle_core::bail!("Number of readers does not match the number of splits."); | ||
} else if n_splits.len() == 1 { | ||
info!("Model n splits: {}", n_splits[0]); | ||
} | ||
|
||
let mut arch = None; | ||
for ct in &contents { | ||
if !ct.metadata.contains_key("general.architecture") { | ||
continue; | ||
} | ||
|
||
arch = Some( | ||
ct.metadata["general.architecture"] | ||
.to_string() | ||
.context("Model metadata should have declared an architecture") | ||
.and_then(GGUFArchitecture::from_value) | ||
.unwrap(), | ||
); | ||
} | ||
let arch = arch.expect("GGUF files must specify `general.architecture`"); | ||
|
||
let mut all_metadata = HashMap::new(); | ||
for content in &contents { | ||
all_metadata.extend(content.metadata.clone()) | ||
} | ||
|
||
Ok(Self { | ||
contents, | ||
readers, | ||
arch, | ||
all_metadata, | ||
}) | ||
} | ||
|
||
pub fn arch(&self) -> GGUFArchitecture { | ||
self.arch | ||
} | ||
|
||
/// Retrieve a tensor, searching through each content. | ||
pub fn tensor(&mut self, name: &str, device: &Device) -> Result<QTensor> { | ||
for (ct, reader) in self.contents.iter().zip(self.readers.iter_mut()) { | ||
if let Some(tensor_info) = ct.tensor_infos.get(name) { | ||
return tensor_info.read(reader, ct.tensor_data_offset, device); | ||
} | ||
} | ||
candle_core::bail!("Cannot find tensor info for {name}") | ||
} | ||
|
||
/// Print metadata for these contents. | ||
/// This will also log tensor name, shape and dtype to `mistralrs_gguf_tensors.txt` is DEBUG is enabled. | ||
pub fn print_metadata(&self) -> anyhow::Result<()> { | ||
// Find the ct with general.architecture | ||
let mut keys = Vec::new(); | ||
let mut metadatas = Vec::new(); | ||
let mut tensors = Vec::new(); | ||
for ct in &self.contents { | ||
keys.extend(ct.metadata.keys()); | ||
metadatas.push(&ct.metadata); | ||
|
||
if DEBUG.load(std::sync::atomic::Ordering::Relaxed) { | ||
for (name, info) in &ct.tensor_infos { | ||
tensors.push(format!( | ||
"name = `{name}`, shape = {:?}, dtype = {:?}", | ||
info.shape.clone(), | ||
info.ggml_dtype | ||
)); | ||
} | ||
} | ||
} | ||
|
||
info!("Model config:"); | ||
keys.sort(); | ||
let mut output_keys = IndexMap::new(); | ||
for name in keys { | ||
if !name.contains("tokenizer") { | ||
for metadata in &metadatas { | ||
if let Some(val) = metadata.get(name) { | ||
output_keys.insert(name, parse_gguf_value(val)); | ||
} | ||
} | ||
} | ||
} | ||
for (name, val) in output_keys { | ||
println!("{name}: {val}") | ||
} | ||
|
||
if DEBUG.load(std::sync::atomic::Ordering::Relaxed) { | ||
fs::write( | ||
"mistralrs_gguf_tensors.txt", | ||
serde_json::to_string_pretty(&tensors).expect("Serialization failed."), | ||
)?; | ||
|
||
info!("Debug is enabled, wrote the names and information about each tensor to `mistralrs_gguf_tensors.txt`."); | ||
} | ||
|
||
anyhow::Ok(()) | ||
} | ||
|
||
/// Get all metadatas | ||
pub fn get_metadata(&self) -> &HashMap<String, Value> { | ||
&self.all_metadata | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,40 @@ | ||
mod chat_template; | ||
mod content; | ||
mod gguf_tokenizer; | ||
use strum::EnumString; | ||
|
||
pub use chat_template::get_gguf_chat_template; | ||
use anyhow::{Context, Result}; | ||
pub(crate) use chat_template::get_gguf_chat_template; | ||
pub(crate) use content::Content; | ||
pub(crate) use gguf_tokenizer::{convert_gguf_to_hf_tokenizer, GgufTokenizerConversion}; | ||
use std::str::FromStr; | ||
|
||
pub const GGUF_MULTI_FILE_DELIMITER: &str = " "; | ||
|
||
#[derive(Debug, EnumString, Clone, Copy)] | ||
#[strum(serialize_all = "kebab-case")] | ||
pub enum GGUFArchitecture { | ||
Llama, | ||
Mpt, | ||
Gptneox, | ||
Gptj, | ||
Gpt2, | ||
Bloom, | ||
Falcon, | ||
Mamba, | ||
Rwkv, | ||
Phi2, | ||
Phi3, | ||
Starcoder2, | ||
} | ||
|
||
// Wraps from_str() for some convenience: | ||
// - Case-insensitive variant matching (TODO: is this desirable?) | ||
// - Customized error until potential upstream support: https://github.com/Peternator7/strum/issues/332 | ||
impl GGUFArchitecture { | ||
pub fn from_value<T: AsRef<str> + std::fmt::Display>(value: T) -> Result<Self> { | ||
Self::from_str(&value.as_ref().to_ascii_lowercase()) | ||
.with_context(|| format!("Unknown GGUF architecture `{value}`")) | ||
.map_err(anyhow::Error::msg) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.