From 438e88db6f1b2fab7192a8a942704913478de272 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Fri, 24 May 2024 13:41:27 +1000 Subject: [PATCH] convert-hf-to-gguf.py: use metadata override info to calc default filename --- convert-hf-to-gguf.py | 25 ++++++++++++++++--------- convert.py | 3 +++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 7bfe48e82213c5..8f0748f681e7e5 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -133,15 +133,19 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.ftype = gguf.LlamaFileType.MOSTLY_BF16 # Generate default filename based on model specification and available metadata - version_string = None # TODO: Add metadata support + def get_model_name(metadata, dir_model): + if metadata is not None and metadata.name is not None: + return metadata.name + elif dir_model is not None and dir_model.name is not None: + return dir_model.name + return None + def extract_encoding_scheme(ftype): + # Extracts and converts the encoding scheme from the given file type name. + # e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32' + return ftype.name.partition("_")[2].upper() + model_name = get_model_name(metadata.name, dir_model) expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None - encodingScheme = { - gguf.LlamaFileType.ALL_F32 : "F32", - gguf.LlamaFileType.MOSTLY_F16 : "F16", - gguf.LlamaFileType.MOSTLY_BF16 : "BF16", - gguf.LlamaFileType.MOSTLY_Q8_0 : "Q8_0", - }[self.ftype] - self.fname_default = f"{gguf.naming_convention(dir_model.name, version_string, expert_count, self.parameter_count(), encodingScheme)}" + self.fname_default = f"{gguf.naming_convention(model_name, self.metadata.version, expert_count, self.parameter_count(), extract_encoding_scheme(self.ftype))}" # Filename Output if fname_out is not None: @@ -151,7 +155,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, # output in the same directory as the model by default self.fname_out = dir_model.parent / self.fname_default - # allow templating the file name with the output ftype, useful with the "auto" ftype + # Configure GGUF Writer self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file) @classmethod @@ -324,6 +328,9 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i return False def parameter_count(self): + # TODO: Ensure parameter count is accurate throughout various model type + # May currently overestimate parameter count in Mamba model because + # output weights is tied with token embeddings. total_model_parameters = 0 for name, data_torch in self.get_tensors(): # Got A Tensor diff --git a/convert.py b/convert.py index 71f52cec6f1519..475746d1409ec3 100755 --- a/convert.py +++ b/convert.py @@ -1319,6 +1319,9 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT def model_parameter_count(model: LazyModel) -> int: + # TODO: Ensure parameter count is accurate throughout various model type + # May currently overestimate parameter count in Mamba model because + # output weights is tied with token embeddings. total_model_parameters = 0 for name, lazy_tensor in model.items(): # Got A Tensor