Skip to content

Commit

Permalink
convert-*.py: GGUF Naming Convention Refactor and Metadata Override R…
Browse files Browse the repository at this point in the history
…efactor (#7499)

Main thing is that the default output filename will take this form

{name}{parameters}{finetune}{version}{encoding}{kind}

In addition this add and remove some entries in the KV store and adds a metadata class with automatic heuristics capability to derive some values based on model card content

* No Change:
  - Internal GGUF Spec
    - `general.architecture`
    - `general.quantization_version`
    - `general.alignment`
    - `general.file_type`
  - General Model Details
    - `general.name`
    - `general.author`
    - `general.version`
    - `general.description`
  - Licensing details
    - `general.license`
  - Typically represents the converted GGUF repo (Unless made from scratch)
    - `general.url`
  - Model Source during conversion
    - `general.source.url`

* Removed:
  - Model Source during conversion
    - `general.source.huggingface.repository`

* Added:
  - General Model Details
    - `general.organization`
    - `general.finetune`
    - `general.basename`
    - `general.quantized_by`
    - `general.size_label`
  - Licensing details
    - `general.license.name`
    - `general.license.link`
  - Typically represents the converted GGUF repo (Unless made from scratch)
    - `general.doi`
    - `general.uuid`
    - `general.repo_url`
  - Model Source during conversion
    - `general.source.doi`
    - `general.source.uuid`
    - `general.source.repo_url`
  - Base Model Source
    - `general.base_model.count`
    - `general.base_model.{id}.name`
    - `general.base_model.{id}.author`
    - `general.base_model.{id}.version`
    - `general.base_model.{id}.organization`
    - `general.base_model.{id}.url` (Model Website/Paper)
    - `general.base_model.{id}.doi`
    - `general.base_model.{id}.uuid`
    - `general.base_model.{id}.repo_url` (Model Source Repository (git/svn/etc...))
  - Array based KV stores
    - `general.tags`
    - `general.languages`
    - `general.datasets`

---------

Co-authored-by: compilade <git@compilade.net>
Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
  • Loading branch information
3 people committed Jul 18, 2024
1 parent 3807c3d commit 672a6f1
Show file tree
Hide file tree
Showing 13 changed files with 1,177 additions and 231 deletions.
187 changes: 112 additions & 75 deletions convert_hf_to_gguf.py

Large diffs are not rendered by default.

29 changes: 19 additions & 10 deletions convert_lora_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,10 @@ def parse_args() -> argparse.Namespace:
"--verbose", action="store_true",
help="increase output verbosity",
)
parser.add_argument(
"--dry-run", action="store_true",
help="only print out what will be done, without writing any new files",
)
parser.add_argument(
"--base", type=Path, required=True,
help="directory containing base model file",
Expand Down Expand Up @@ -300,6 +304,12 @@ def parse_args() -> argparse.Namespace:
# load base model
logger.info(f"Loading base model: {dir_base_model.name}")
hparams = Model.load_hparams(dir_base_model)

with open(lora_config, "r") as f:
lparams: dict[str, Any] = json.load(f)

alpha: float = lparams["lora_alpha"]

with torch.inference_mode():
try:
model_class = Model.from_model_architecture(hparams["architectures"][0])
Expand All @@ -310,6 +320,14 @@ def parse_args() -> argparse.Namespace:
class LoraModel(model_class):
model_arch = model_class.model_arch

def set_type(self):
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")

def set_gguf_parameters(self):
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
super().set_gguf_parameters()

def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
tensor_map: dict[str, PartialLoraTensor] = {}

Expand Down Expand Up @@ -357,18 +375,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
is_big_endian=args.bigendian,
use_temp_file=False,
eager=args.no_lazy,
model_name=None,
dry_run=args.dry_run,
)

with open(lora_config, "r") as f:
lparams: dict[str, Any] = json.load(f)

alpha = lparams["lora_alpha"]

model_instance.gguf_writer.add_string(gguf.Keys.General.TYPE, gguf.GGUFType.ADAPTER)
model_instance.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
model_instance.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
logger.info("Exporting model...")
model_instance.write()
logger.info(f"Model successfully exported to {model_instance.fname_out}")
235 changes: 128 additions & 107 deletions examples/convert_legacy_llama.py

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions gguf-py/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,13 @@ python -m build
python -m twine upload dist/*
```

## Run Unit Tests

From root of this repository you can run this command to run all the unit tests

```bash
python -m unittest discover ./gguf-py -v
```

## TODO
- [ ] Include conversion scripts as command line entry points in this package.
2 changes: 2 additions & 0 deletions gguf-py/gguf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
from .quants import *
from .tensor_mapping import *
from .vocab import *
from .utility import *
from .metadata import *
68 changes: 54 additions & 14 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,60 @@

class Keys:
class General:
TYPE = "general.type"
ARCHITECTURE = "general.architecture"
QUANTIZATION_VERSION = "general.quantization_version"
ALIGNMENT = "general.alignment"
NAME = "general.name"
AUTHOR = "general.author"
VERSION = "general.version"
URL = "general.url"
DESCRIPTION = "general.description"
LICENSE = "general.license"
SOURCE_URL = "general.source.url"
SOURCE_HF_REPO = "general.source.huggingface.repository"
FILE_TYPE = "general.file_type"
TYPE = "general.type"
ARCHITECTURE = "general.architecture"
QUANTIZATION_VERSION = "general.quantization_version"
ALIGNMENT = "general.alignment"
FILE_TYPE = "general.file_type"

# Authorship Metadata
NAME = "general.name"
AUTHOR = "general.author"
VERSION = "general.version"
ORGANIZATION = "general.organization"

FINETUNE = "general.finetune"
BASENAME = "general.basename"

DESCRIPTION = "general.description"
QUANTIZED_BY = "general.quantized_by"

SIZE_LABEL = "general.size_label"

# Licensing details
LICENSE = "general.license"
LICENSE_NAME = "general.license.name"
LICENSE_LINK = "general.license.link"

# Typically represents the converted GGUF repo (Unless native)
URL = "general.url" # Model Website/Paper
DOI = "general.doi"
UUID = "general.uuid"
REPO_URL = "general.repo_url" # Model Source Repository (git/svn/etc...)

# Model Source during conversion
SOURCE_URL = "general.source.url" # Model Website/Paper
SOURCE_DOI = "general.source.doi"
SOURCE_UUID = "general.source.uuid"
SOURCE_REPO_URL = "general.source.repo_url" # Model Source Repository (git/svn/etc...)

# Base Model Source. There can be more than one source if it's a merged
# model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
# tracing linage of models as it is finetuned or merged over time.
BASE_MODEL_COUNT = "general.base_model.count"
BASE_MODEL_NAME = "general.base_model.{id}.name"
BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
BASE_MODEL_VERSION = "general.base_model.{id}.version"
BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
BASE_MODEL_DOI = "general.base_model.{id}.doi"
BASE_MODEL_UUID = "general.base_model.{id}.uuid"
BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)

# Array based KV stores
TAGS = "general.tags"
LANGUAGES = "general.languages"
DATASETS = "general.datasets"

class LLM:
VOCAB_SIZE = "{arch}.vocab_size"
Expand Down Expand Up @@ -1233,7 +1274,6 @@ def get_type(val: Any) -> GGUFValueType:
KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION
KEY_GENERAL_LICENSE = Keys.General.LICENSE
KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL
KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO
KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE

# LLM
Expand Down
158 changes: 140 additions & 18 deletions gguf-py/gguf/gguf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import tempfile
from dataclasses import dataclass
from enum import Enum, auto
from math import prod
from pathlib import Path
from io import BufferedWriter
from typing import IO, Any, Sequence, Mapping
Expand Down Expand Up @@ -106,6 +107,53 @@ def __init__(

self.add_architecture()

def get_total_parameter_count(self) -> tuple[int, int, int, int]:
total_params = 0
shared_params = 0
expert_params = 0

expert_sum = 0
n_expert_tensors = 0

last_lora_a: tuple[str, TensorInfo] | None = None

for tensors in self.tensors:
for name, info in tensors.items():

shape = info.shape

if name.endswith(".lora_a"):
last_lora_a = (name, info)
continue
elif name.endswith(".lora_b"):
if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
# Bail when the LoRA pair can't be found trivially
logger.warning("can't measure LoRA size correctly, tensor order is unusual")
return 0, 0, 0, 0
else:
shape = (*shape[:-1], last_lora_a[1].shape[-1])

size = prod(shape)

if "_exps." in name:
expert_params += (size // shape[-3])
expert_sum += shape[-3]
n_expert_tensors += 1
else:
shared_params += size

total_params += size

# Hopefully this should work even for variable-expert-count models
expert_count = (expert_sum // n_expert_tensors) if n_expert_tensors > 0 else 0

# Negate the total to signal it's likely not exact
if last_lora_a is not None:
total_params = -total_params

# NOTE: keep the output in the same order as accepted by 'size_label' in gguf-py/gguf/utility.py
return total_params, shared_params, expert_params, expert_count

def format_shard_names(self, path: Path) -> list[Path]:
if len(self.tensors) == 1:
return [path]
Expand All @@ -115,6 +163,7 @@ def open_output_file(self, path: Path | None = None) -> None:
if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
# allow calling this multiple times as long as the path is the same
return

if self.state is not WriterState.NO_FILE:
raise ValueError(f'Expected output file to be not yet opened, got {self.state}')

Expand All @@ -136,6 +185,8 @@ def print_plan(self) -> list[Path]:

if self.dry_run:
logger.info("Dry run, not writing files")
for name in filenames:
print(name) # noqa: NP100
exit()

return filenames
Expand Down Expand Up @@ -430,43 +481,114 @@ def add_type(self, type_name: str) -> None:
def add_architecture(self) -> None:
self.add_string(Keys.General.ARCHITECTURE, self.arch)

def add_quantization_version(self, quantization_version: int) -> None:
self.add_uint32(Keys.General.QUANTIZATION_VERSION, quantization_version)

def add_custom_alignment(self, alignment: int) -> None:
self.data_alignment = alignment
self.add_uint32(Keys.General.ALIGNMENT, alignment)

def add_file_type(self, ftype: int) -> None:
self.add_uint32(Keys.General.FILE_TYPE, ftype)

def add_name(self, name: str) -> None:
self.add_string(Keys.General.NAME, name)

def add_author(self, author: str) -> None:
self.add_string(Keys.General.AUTHOR, author)

def add_version(self, version: str) -> None:
self.add_string(Keys.General.VERSION, version)

def add_tensor_data_layout(self, layout: str) -> None:
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
def add_organization(self, organization: str) -> None:
self.add_string(Keys.General.ORGANIZATION, organization)

def add_url(self, url: str) -> None:
self.add_string(Keys.General.URL, url)
def add_finetune(self, finetune: str) -> None:
self.add_string(Keys.General.FINETUNE, finetune)

def add_basename(self, basename: str) -> None:
self.add_string(Keys.General.BASENAME, basename)

def add_description(self, description: str) -> None:
self.add_string(Keys.General.DESCRIPTION, description)

def add_licence(self, licence: str) -> None:
self.add_string(Keys.General.LICENSE, licence)
def add_quantized_by(self, quantized: str) -> None:
self.add_string(Keys.General.QUANTIZED_BY, quantized)

def add_size_label(self, size_label: str) -> None:
self.add_string(Keys.General.SIZE_LABEL, size_label)

def add_license(self, license: str) -> None:
self.add_string(Keys.General.LICENSE, license)

def add_license_name(self, license: str) -> None:
self.add_string(Keys.General.LICENSE_NAME, license)

def add_license_link(self, license: str) -> None:
self.add_string(Keys.General.LICENSE_LINK, license)

def add_url(self, url: str) -> None:
self.add_string(Keys.General.URL, url)

def add_doi(self, doi: str) -> None:
self.add_string(Keys.General.DOI, doi)

def add_uuid(self, uuid: str) -> None:
self.add_string(Keys.General.UUID, uuid)

def add_repo_url(self, repo_url: str) -> None:
self.add_string(Keys.General.REPO_URL, repo_url)

def add_source_url(self, url: str) -> None:
self.add_string(Keys.General.SOURCE_URL, url)

def add_source_hf_repo(self, repo: str) -> None:
self.add_string(Keys.General.SOURCE_HF_REPO, repo)
def add_source_doi(self, doi: str) -> None:
self.add_string(Keys.General.SOURCE_DOI, doi)

def add_file_type(self, ftype: int) -> None:
self.add_uint32(Keys.General.FILE_TYPE, ftype)
def add_source_uuid(self, uuid: str) -> None:
self.add_string(Keys.General.SOURCE_UUID, uuid)

def add_name(self, name: str) -> None:
self.add_string(Keys.General.NAME, name)
def add_source_repo_url(self, repo_url: str) -> None:
self.add_string(Keys.General.SOURCE_REPO_URL, repo_url)

def add_quantization_version(self, quantization_version: int) -> None:
self.add_uint32(
Keys.General.QUANTIZATION_VERSION, quantization_version)
def add_base_model_count(self, source_count: int) -> None:
self.add_uint32(Keys.General.BASE_MODEL_COUNT, source_count)

def add_custom_alignment(self, alignment: int) -> None:
self.data_alignment = alignment
self.add_uint32(Keys.General.ALIGNMENT, alignment)
def add_base_model_name(self, source_id: int, name: str) -> None:
self.add_string(Keys.General.BASE_MODEL_NAME.format(id=source_id), name)

def add_base_model_author(self, source_id: int, author: str) -> None:
self.add_string(Keys.General.BASE_MODEL_AUTHOR.format(id=source_id), author)

def add_base_model_version(self, source_id: int, version: str) -> None:
self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version)

def add_base_model_organization(self, source_id: int, organization: str) -> None:
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)

def add_base_model_url(self, source_id: int, url: str) -> None:
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)

def add_base_model_doi(self, source_id: int, doi: str) -> None:
self.add_string(Keys.General.BASE_MODEL_DOI.format(id=source_id), doi)

def add_base_model_uuid(self, source_id: int, uuid: str) -> None:
self.add_string(Keys.General.BASE_MODEL_UUID.format(id=source_id), uuid)

def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)

def add_tags(self, tags: Sequence[str]) -> None:
self.add_array(Keys.General.TAGS, tags)

def add_languages(self, languages: Sequence[str]) -> None:
self.add_array(Keys.General.LANGUAGES, languages)

def add_datasets(self, datasets: Sequence[str]) -> None:
self.add_array(Keys.General.DATASETS, datasets)

def add_tensor_data_layout(self, layout: str) -> None:
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)

def add_vocab_size(self, size: int) -> None:
self.add_uint32(Keys.LLM.VOCAB_SIZE.format(arch=self.arch), size)
Expand Down
Loading

0 comments on commit 672a6f1

Please sign in to comment.