Skip to content

Commit

Permalink
convert-hf-to-gguf.py: use metadata override info to calc default fil…
Browse files Browse the repository at this point in the history
…ename
  • Loading branch information
mofosyne committed May 24, 2024
1 parent c704442 commit 438e88d
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 9 deletions.
25 changes: 16 additions & 9 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,15 +133,19 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
self.ftype = gguf.LlamaFileType.MOSTLY_BF16

# Generate default filename based on model specification and available metadata
version_string = None # TODO: Add metadata support
def get_model_name(metadata, dir_model):
if metadata is not None and metadata.name is not None:
return metadata.name
elif dir_model is not None and dir_model.name is not None:
return dir_model.name
return None
def extract_encoding_scheme(ftype):
# Extracts and converts the encoding scheme from the given file type name.
# e.g. 'gguf.LlamaFileType.ALL_F32' --> 'F32'
return ftype.name.partition("_")[2].upper()
model_name = get_model_name(metadata.name, dir_model)
expert_count = self.hparams["num_local_experts"] if "num_local_experts" in self.hparams else None
encodingScheme = {
gguf.LlamaFileType.ALL_F32 : "F32",
gguf.LlamaFileType.MOSTLY_F16 : "F16",
gguf.LlamaFileType.MOSTLY_BF16 : "BF16",
gguf.LlamaFileType.MOSTLY_Q8_0 : "Q8_0",
}[self.ftype]
self.fname_default = f"{gguf.naming_convention(dir_model.name, version_string, expert_count, self.parameter_count(), encodingScheme)}"
self.fname_default = f"{gguf.naming_convention(model_name, self.metadata.version, expert_count, self.parameter_count(), extract_encoding_scheme(self.ftype))}"

# Filename Output
if fname_out is not None:
Expand All @@ -151,7 +155,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
# output in the same directory as the model by default
self.fname_out = dir_model.parent / self.fname_default

# allow templating the file name with the output ftype, useful with the "auto" ftype
# Configure GGUF Writer
self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)

@classmethod
Expand Down Expand Up @@ -324,6 +328,9 @@ def extra_f16_tensors(self, name: str, new_name: str, bid: int | None, n_dims: i
return False

def parameter_count(self):
# TODO: Ensure parameter count is accurate throughout various model type
# May currently overestimate parameter count in Mamba model because
# output weights is tied with token embeddings.
total_model_parameters = 0
for name, data_torch in self.get_tensors():
# Got A Tensor
Expand Down
3 changes: 3 additions & 0 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -1319,6 +1319,9 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT


def model_parameter_count(model: LazyModel) -> int:
# TODO: Ensure parameter count is accurate throughout various model type
# May currently overestimate parameter count in Mamba model because
# output weights is tied with token embeddings.
total_model_parameters = 0
for name, lazy_tensor in model.items():
# Got A Tensor
Expand Down

0 comments on commit 438e88d

Please sign in to comment.