From 061356241297c0f389ea743ce932a302961aed75 Mon Sep 17 00:00:00 2001 From: chenqiny Date: Mon, 2 Oct 2023 04:28:22 -0400 Subject: [PATCH 1/8] check whether platform is 390x if yes->do not import immintrin.h --- k_quants.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k_quants.c b/k_quants.c index 62085882df71c..7db605c11ee56 100644 --- a/k_quants.c +++ b/k_quants.c @@ -46,7 +46,7 @@ inline static int32_t vaddvq_s32(int32x4_t v) { #if defined(_MSC_VER) || defined(__MINGW32__) #include #else -#if !defined(__riscv) +#if !defined(__riscv) && !defined(__s390__) #include #endif #endif From fa62c8c73a58f4f6ac88097a936862f90c666e0e Mon Sep 17 00:00:00 2001 From: chenqiny Date: Sun, 8 Oct 2023 11:47:39 +0800 Subject: [PATCH 2/8] support s390x big endian --- convert.py | 1 + gguf-py/gguf/gguf.py | 49 +++++++++++++++++++------------------ tests/test-double-float.cpp | 2 ++ 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/convert.py b/convert.py index 8bb6c7e410852..20e27aa426cfc 100755 --- a/convert.py +++ b/convert.py @@ -947,6 +947,7 @@ def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyM elapsed = time.time() - start size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) padi = len(str(len(model))) + ndarray.byteswap(inplace=True) print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}") of.gguf.write_tensor_data(ndarray) diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 598cf8e594aa8..2e997f72a11d1 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -22,6 +22,7 @@ GGUF_VERSION = 2 GGUF_DEFAULT_ALIGNMENT = 32 + # general KEY_GENERAL_ARCHITECTURE = "general.architecture" KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version" @@ -428,7 +429,6 @@ class GGMLQuantizationType(IntEnum): Q6_K = 14 Q8_K = 15 - class GGUFValueType(IntEnum): UINT8 = 0 INT8 = 1 @@ -483,10 +483,10 @@ def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True self.tensors = [] def write_header_to_file(self): - self.fout.write(struct.pack("I", GGUF_MAGIC)) + self.fout.write(struct.pack(">I", GGUF_VERSION)) + self.fout.write(struct.pack(">Q", self.ti_data_count)) + self.fout.write(struct.pack(">Q", self.kv_data_count)) self.flush() # print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count)) @@ -559,16 +559,16 @@ def add_array(self, key: str, val: Sequence[Any]): self.add_val(val, GGUFValueType.ARRAY) _simple_value_packing = { - GGUFValueType.UINT8: "h", + GGUFValueType.UINT32: ">I", + GGUFValueType.INT32: ">i", + GGUFValueType.FLOAT32: ">f", + GGUFValueType.UINT64: ">Q", + GGUFValueType.INT64: ">q", + GGUFValueType.FLOAT64: ">d", GGUFValueType.BOOL: "?" , } def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True): @@ -576,7 +576,7 @@ def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool vtype = GGUFValueType.get_type(val) if add_vtype: - self.kv_data += struct.pack("I", vtype) self.kv_data_count += 1 pack_fmt = self._simple_value_packing.get(vtype) @@ -584,14 +584,14 @@ def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool self.kv_data += struct.pack(pack_fmt, val) elif vtype == GGUFValueType.STRING: encoded_val = val.encode("utf8") if isinstance(val, str) else val - self.kv_data += struct.pack("Q", len(encoded_val)) self.kv_data += encoded_val elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0: ltype = GGUFValueType.get_type(val[0]) if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]): raise ValueError("All items in a GGUF array should be of the same type") - self.kv_data += struct.pack("I", ltype) + self.kv_data += struct.pack(">Q", len(val)) for item in val: self.add_val(item, add_vtype=False) else: @@ -605,22 +605,23 @@ def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now" encoded_name = name.encode("utf8") - self.ti_data += struct.pack("Q", len(encoded_name)) self.ti_data += encoded_name n_dims = len(tensor_shape) - self.ti_data += struct.pack("I", n_dims) for i in range(n_dims): - self.ti_data += struct.pack("Q", tensor_shape[n_dims - 1 - i]) if raw_dtype is None: dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16 else: dtype = raw_dtype - self.ti_data += struct.pack("I", dtype) + self.ti_data += struct.pack(">Q", self.offset_tensor) self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment) self.ti_data_count += 1 def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None): + tensor.byteswap(inplace=True) if self.use_temp_file and self.temp_file is None: fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024) fp.seek(0) diff --git a/tests/test-double-float.cpp b/tests/test-double-float.cpp index b506f273fee9f..afd7bf77fcb55 100644 --- a/tests/test-double-float.cpp +++ b/tests/test-double-float.cpp @@ -4,7 +4,9 @@ #undef NDEBUG #include +#if !defined(__riscv) && !defined(__s390__) #include +#endif #include #include #include From 1ce890a7c0007564dc57c8c70ff37fec83973d12 Mon Sep 17 00:00:00 2001 From: chenqiny Date: Mon, 9 Oct 2023 10:40:41 +0800 Subject: [PATCH 3/8] support --bigendian option for s390x 1. verified with baichuan7b-chat with float 16 on s390x 2. verified with baichuan7b-chat 3. verified with chinese-alpaca-2-13b-f16 --- convert-baichuan-hf-to-gguf.py | 7 +++- convert.py | 23 ++++++----- gguf-py/gguf/gguf.py | 73 +++++++++++++++++++++------------- 3 files changed, 65 insertions(+), 38 deletions(-) diff --git a/convert-baichuan-hf-to-gguf.py b/convert-baichuan-hf-to-gguf.py index 8bd34dc440769..eac2663fc7a63 100755 --- a/convert-baichuan-hf-to-gguf.py +++ b/convert-baichuan-hf-to-gguf.py @@ -73,6 +73,7 @@ def parse_args() -> argparse.Namespace: "ftype", type=int, choices=[0, 1], default=1, nargs='?', help="output format - use 0 for float32, 1 for float16", ) + parser.add_argument("--bigendian", action="store_true", help="model is executed on big endian machine") return parser.parse_args() args = parse_args() @@ -83,6 +84,10 @@ def parse_args() -> argparse.Namespace: print(f'Error: {args.model} is not a directory', file = sys.stderr) sys.exit(1) +endianess =gguf.GGUFEndian.LITTLE +if args.bigendian: + endianess = gguf.GGUFEndian.BIG +print(f"gguf: Conversion Endianess {endianess}") # possible tensor data types # ftype == 0 -> float32 # ftype == 1 -> float16 @@ -110,7 +115,7 @@ def parse_args() -> argparse.Namespace: num_parts = count_model_parts(dir_model) print(f"num_parts:{num_parts}\n") ARCH=gguf.MODEL_ARCH.BAICHUAN -gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH]) +gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess) print("gguf: get model metadata") diff --git a/convert.py b/convert.py index 20e27aa426cfc..444b04ca6b6bc 100755 --- a/convert.py +++ b/convert.py @@ -818,8 +818,8 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None: class OutputFile: - def __init__(self, fname_out: Path) -> None: - self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH]) + def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian=gguf.GGUFEndian.LITTLE) -> None: + self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess) def add_meta_arch(self, params: Params) -> None: name = "LLaMA" @@ -890,10 +890,10 @@ def close(self) -> None: self.gguf.close() @staticmethod - def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab) -> None: + def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess:gguf.GGUFEndian=gguf.GGUFEndian.LITTLE) -> None: check_vocab_size(params, vocab) - of = OutputFile(fname_out) + of = OutputFile(fname_out, endianess=endianess) # meta data of.add_meta_arch(params) @@ -918,10 +918,10 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray: return dt.quantize(arr) @staticmethod - def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY) -> None: + def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None: check_vocab_size(params, vocab) - of = OutputFile(fname_out) + of = OutputFile(fname_out, endianess=endianess) # meta data of.add_meta_arch(params) @@ -947,7 +947,8 @@ def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyM elapsed = time.time() - start size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) padi = len(str(len(model))) - ndarray.byteswap(inplace=True) + if endianess==gguf.GGUFEndian.BIG: + ndarray.byteswap(inplace=True) print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}") of.gguf.write_tensor_data(ndarray) @@ -1139,8 +1140,9 @@ def main(args_in: list[str] | None = None) -> None: parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm") parser.add_argument("--ctx", type=int, help="model training context (default: based on input)") parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default = DEFAULT_CONCURRENCY) - args = parser.parse_args(args_in) + parser.add_argument("--bigendian", action="store_true", help="model is executed on big endian machine") + args = parser.parse_args(args_in) if args.dump_single: model_plus = lazy_load_file(args.model) do_dump_model(model_plus) @@ -1154,6 +1156,9 @@ def main(args_in: list[str] | None = None) -> None: if args.dump: do_dump_model(model_plus) return + endianess = gguf.GGUFEndian.LITTLE + if args.bigendian: + endianess = gguf.GGUFEndian.BIG params = Params.load(model_plus) if params.n_ctx == -1: @@ -1201,7 +1206,7 @@ def main(args_in: list[str] | None = None) -> None: params.ftype = ftype print(f"Writing {outfile}, format {ftype}") - OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency) + OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianess=endianess) print(f"Wrote {outfile}") diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 2e997f72a11d1..bcb543eaedce1 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -429,6 +429,11 @@ class GGMLQuantizationType(IntEnum): Q6_K = 14 Q8_K = 15 +class GGUFEndian(IntEnum): + LITTLE = 0 + BIG = 1 + + class GGUFValueType(IntEnum): UINT8 = 0 INT8 = 1 @@ -475,18 +480,41 @@ class GGUFWriter: temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None tensors: list[tuple[np.ndarray[Any, Any], int]] - def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True): + def get_pack_prefix(self): + if self.endianess==GGUFEndian.LITTLE: + return "<" + else: + return ">" + + def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True, endianess=GGUFEndian.LITTLE): self.fout = open(path, "wb") self.arch = arch + self.endianess = endianess + self._simple_value_packing = { + GGUFValueType.UINT8: f"{self.get_pack_prefix()}B", + GGUFValueType.INT8: f"{self.get_pack_prefix()}b", + GGUFValueType.UINT16: f"{self.get_pack_prefix()}H", + GGUFValueType.INT16: f"{self.get_pack_prefix()}h", + GGUFValueType.UINT32: f"{self.get_pack_prefix()}I", + GGUFValueType.INT32: f"{self.get_pack_prefix()}i", + GGUFValueType.FLOAT32: f"{self.get_pack_prefix()}f", + GGUFValueType.UINT64: f"{self.get_pack_prefix()}Q", + GGUFValueType.INT64: f"{self.get_pack_prefix()}q", + GGUFValueType.FLOAT64: f"{self.get_pack_prefix()}d", + GGUFValueType.BOOL: "?" , + } self.add_architecture() self.use_temp_file = use_temp_file self.tensors = [] + + + print(f"This gguf file is for {self.endianess} only") def write_header_to_file(self): - self.fout.write(struct.pack(">I", GGUF_MAGIC)) - self.fout.write(struct.pack(">I", GGUF_VERSION)) - self.fout.write(struct.pack(">Q", self.ti_data_count)) - self.fout.write(struct.pack(">Q", self.kv_data_count)) + self.fout.write(struct.pack(f"{self.get_pack_prefix()}I", GGUF_MAGIC)) + self.fout.write(struct.pack(f"{self.get_pack_prefix()}I", GGUF_VERSION)) + self.fout.write(struct.pack(f"{self.get_pack_prefix()}Q", self.ti_data_count)) + self.fout.write(struct.pack(f"{self.get_pack_prefix()}Q", self.kv_data_count)) self.flush() # print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count)) @@ -558,25 +586,13 @@ def add_array(self, key: str, val: Sequence[Any]): self.add_key(key) self.add_val(val, GGUFValueType.ARRAY) - _simple_value_packing = { - GGUFValueType.UINT8: f"{GGUF_ENDIANESS}B", - GGUFValueType.INT8: f"{GGUF_ENDIANESS.}b", - GGUFValueType.UINT16: f"{GGUF_ENDIANESS.get}H", - GGUFValueType.INT16: ">h", - GGUFValueType.UINT32: ">I", - GGUFValueType.INT32: ">i", - GGUFValueType.FLOAT32: ">f", - GGUFValueType.UINT64: ">Q", - GGUFValueType.INT64: ">q", - GGUFValueType.FLOAT64: ">d", - GGUFValueType.BOOL: "?" , - } + def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True): if vtype is None: vtype = GGUFValueType.get_type(val) if add_vtype: - self.kv_data += struct.pack(">I", vtype) + self.kv_data += struct.pack(f"{self.get_pack_prefix()}I", vtype) self.kv_data_count += 1 pack_fmt = self._simple_value_packing.get(vtype) @@ -584,14 +600,14 @@ def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool self.kv_data += struct.pack(pack_fmt, val) elif vtype == GGUFValueType.STRING: encoded_val = val.encode("utf8") if isinstance(val, str) else val - self.kv_data += struct.pack(">Q", len(encoded_val)) + self.kv_data += struct.pack(f"{self.get_pack_prefix()}Q", len(encoded_val)) self.kv_data += encoded_val elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0: ltype = GGUFValueType.get_type(val[0]) if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]): raise ValueError("All items in a GGUF array should be of the same type") - self.kv_data += struct.pack(">I", ltype) - self.kv_data += struct.pack(">Q", len(val)) + self.kv_data += struct.pack(f"{self.get_pack_prefix()}I", ltype) + self.kv_data += struct.pack(f"{self.get_pack_prefix()}Q", len(val)) for item in val: self.add_val(item, add_vtype=False) else: @@ -605,23 +621,24 @@ def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now" encoded_name = name.encode("utf8") - self.ti_data += struct.pack(">Q", len(encoded_name)) + self.ti_data += struct.pack(f"{self.get_pack_prefix()}Q", len(encoded_name)) self.ti_data += encoded_name n_dims = len(tensor_shape) - self.ti_data += struct.pack(">I", n_dims) + self.ti_data += struct.pack(f"{self.get_pack_prefix()}I", n_dims) for i in range(n_dims): - self.ti_data += struct.pack(">Q", tensor_shape[n_dims - 1 - i]) + self.ti_data += struct.pack(f"{self.get_pack_prefix()}Q", tensor_shape[n_dims - 1 - i]) if raw_dtype is None: dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16 else: dtype = raw_dtype - self.ti_data += struct.pack(">I", dtype) - self.ti_data += struct.pack(">Q", self.offset_tensor) + self.ti_data += struct.pack(f"{self.get_pack_prefix()}I", dtype) + self.ti_data += struct.pack(f"{self.get_pack_prefix()}Q", self.offset_tensor) self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment) self.ti_data_count += 1 def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None): - tensor.byteswap(inplace=True) + if self.endianess == GGUFEndian.BIG: + tensor.byteswap(inplace=True) if self.use_temp_file and self.temp_file is None: fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024) fp.seek(0) From e4efbdb8c181cce356e3d03ef7adea608729b17e Mon Sep 17 00:00:00 2001 From: chenqiny Date: Mon, 9 Oct 2023 11:51:15 +0800 Subject: [PATCH 4/8] update format based on editor-config checker result --- gguf-py/gguf/gguf.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 1704ff6303f2e..c24bbc1ba6400 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -647,8 +647,6 @@ def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True self.add_architecture() self.use_temp_file = use_temp_file self.tensors = [] - - print(f"This gguf file is for {self.endianess} only") def write_header_to_file(self): @@ -727,7 +725,6 @@ def add_array(self, key: str, val: Sequence[Any]): self.add_key(key) self.add_val(val, GGUFValueType.ARRAY) - def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True): if vtype is None: vtype = GGUFValueType.get_type(val) From 51e9d39117cb99aa48e00917402563eaf7bda1d1 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 11 Oct 2023 09:55:15 +0300 Subject: [PATCH 5/8] Update convert-baichuan-hf-to-gguf.py --- convert-baichuan-hf-to-gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-baichuan-hf-to-gguf.py b/convert-baichuan-hf-to-gguf.py index 8054823cf027b..c1c080f608584 100755 --- a/convert-baichuan-hf-to-gguf.py +++ b/convert-baichuan-hf-to-gguf.py @@ -87,7 +87,7 @@ def parse_args() -> argparse.Namespace: print(f'Error: {args.model} is not a directory', file = sys.stderr) sys.exit(1) -endianess =gguf.GGUFEndian.LITTLE +endianess = gguf.GGUFEndian.LITTLE if args.bigendian: endianess = gguf.GGUFEndian.BIG print(f"gguf: Conversion Endianess {endianess}") From 7fc0250d1556de0353cfe31bfeea313cd758f57b Mon Sep 17 00:00:00 2001 From: chenqiny Date: Fri, 13 Oct 2023 00:23:16 +0800 Subject: [PATCH 6/8] 1. check in ggml.c if endianess is not match 2. update GGUF version 3. change get_pack_prefix to property 4. update information log --- convert-baichuan-hf-to-gguf.py | 1 + ggml.c | 8 ++++- ggml.h | 3 +- gguf-py/gguf/gguf.py | 54 ++++++++++++++++++---------------- gguf-py/pyproject.toml | 2 +- 5 files changed, 39 insertions(+), 29 deletions(-) diff --git a/convert-baichuan-hf-to-gguf.py b/convert-baichuan-hf-to-gguf.py index c1c080f608584..a1783f71fb668 100755 --- a/convert-baichuan-hf-to-gguf.py +++ b/convert-baichuan-hf-to-gguf.py @@ -90,6 +90,7 @@ def parse_args() -> argparse.Namespace: endianess = gguf.GGUFEndian.LITTLE if args.bigendian: endianess = gguf.GGUFEndian.BIG +endianess_str = "Big Endian" if args.bigendian else "Little Endian" print(f"gguf: Conversion Endianess {endianess}") # possible tensor data types # ftype == 0 -> float32 diff --git a/ggml.c b/ggml.c index 6d1776ca46741..04b88c98a837a 100644 --- a/ggml.c +++ b/ggml.c @@ -20916,7 +20916,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p gguf_fread_el(file, &magic, sizeof(magic), &offset); if (magic != GGUF_MAGIC) { - fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic); + if (magic == GGUF_WRONG_ENIAN_MAGIC) + { + fprintf(stderr, "Endianess of the GGUF file and platform do not match.%s: invalid magic number %08x.\n", __func__, magic); + } + else { + fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic); + } fclose(file); return NULL; } diff --git a/ggml.h b/ggml.h index 3eddc44b90fdd..fdd8e31bec70e 100644 --- a/ggml.h +++ b/ggml.h @@ -232,7 +232,8 @@ #define GGML_EXIT_ABORTED 1 #define GGUF_MAGIC 0x46554747 // "GGUF" -#define GGUF_VERSION 2 +#define GGUF_WRONG_ENIAN_MAGIC 0x47475546 +#define GGUF_VERSION 3 #define GGUF_DEFAULT_ALIGNMENT 32 diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index c24bbc1ba6400..748d59343c27b 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -19,7 +19,7 @@ # GGUF_MAGIC = 0x46554747 -GGUF_VERSION = 2 +GGUF_VERSION = 3 GGUF_DEFAULT_ALIGNMENT = 32 @@ -621,7 +621,8 @@ class GGUFWriter: temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None tensors: list[tuple[np.ndarray[Any, Any], int]] - def get_pack_prefix(self): + @property + def pack_prefix(self): if self.endianess==GGUFEndian.LITTLE: return "<" else: @@ -632,28 +633,29 @@ def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True self.arch = arch self.endianess = endianess self._simple_value_packing = { - GGUFValueType.UINT8: f"{self.get_pack_prefix()}B", - GGUFValueType.INT8: f"{self.get_pack_prefix()}b", - GGUFValueType.UINT16: f"{self.get_pack_prefix()}H", - GGUFValueType.INT16: f"{self.get_pack_prefix()}h", - GGUFValueType.UINT32: f"{self.get_pack_prefix()}I", - GGUFValueType.INT32: f"{self.get_pack_prefix()}i", - GGUFValueType.FLOAT32: f"{self.get_pack_prefix()}f", - GGUFValueType.UINT64: f"{self.get_pack_prefix()}Q", - GGUFValueType.INT64: f"{self.get_pack_prefix()}q", - GGUFValueType.FLOAT64: f"{self.get_pack_prefix()}d", + GGUFValueType.UINT8: f"{self.pack_prefix}B", + GGUFValueType.INT8: f"{self.pack_prefix}b", + GGUFValueType.UINT16: f"{self.pack_prefix}H", + GGUFValueType.INT16: f"{self.pack_prefix}h", + GGUFValueType.UINT32: f"{self.pack_prefix}I", + GGUFValueType.INT32: f"{self.pack_prefix}i", + GGUFValueType.FLOAT32: f"{self.pack_prefix}f", + GGUFValueType.UINT64: f"{self.pack_prefix}Q", + GGUFValueType.INT64: f"{self.pack_prefix}q", + GGUFValueType.FLOAT64: f"{self.pack_prefix}d", GGUFValueType.BOOL: "?" , } self.add_architecture() self.use_temp_file = use_temp_file self.tensors = [] - print(f"This gguf file is for {self.endianess} only") + endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian" + print(f"This gguf file is for {endianess_str} only") def write_header_to_file(self): - self.fout.write(struct.pack(f"{self.get_pack_prefix()}I", GGUF_MAGIC)) - self.fout.write(struct.pack(f"{self.get_pack_prefix()}I", GGUF_VERSION)) - self.fout.write(struct.pack(f"{self.get_pack_prefix()}Q", self.ti_data_count)) - self.fout.write(struct.pack(f"{self.get_pack_prefix()}Q", self.kv_data_count)) + self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_MAGIC)) + self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION)) + self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count)) + self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count)) self.flush() # print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count)) @@ -730,7 +732,7 @@ def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool vtype = GGUFValueType.get_type(val) if add_vtype: - self.kv_data += struct.pack(f"{self.get_pack_prefix()}I", vtype) + self.kv_data += struct.pack(f"{self.pack_prefix}I", vtype) self.kv_data_count += 1 pack_fmt = self._simple_value_packing.get(vtype) @@ -738,14 +740,14 @@ def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool self.kv_data += struct.pack(pack_fmt, val) elif vtype == GGUFValueType.STRING: encoded_val = val.encode("utf8") if isinstance(val, str) else val - self.kv_data += struct.pack(f"{self.get_pack_prefix()}Q", len(encoded_val)) + self.kv_data += struct.pack(f"{self.pack_prefix}Q", len(encoded_val)) self.kv_data += encoded_val elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0: ltype = GGUFValueType.get_type(val[0]) if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]): raise ValueError("All items in a GGUF array should be of the same type") - self.kv_data += struct.pack(f"{self.get_pack_prefix()}I", ltype) - self.kv_data += struct.pack(f"{self.get_pack_prefix()}Q", len(val)) + self.kv_data += struct.pack(f"{self.pack_prefix}I", ltype) + self.kv_data += struct.pack(f"{self.pack_prefix}Q", len(val)) for item in val: self.add_val(item, add_vtype=False) else: @@ -759,18 +761,18 @@ def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now" encoded_name = name.encode("utf8") - self.ti_data += struct.pack(f"{self.get_pack_prefix()}Q", len(encoded_name)) + self.ti_data += struct.pack(f"{self.pack_prefix}Q", len(encoded_name)) self.ti_data += encoded_name n_dims = len(tensor_shape) - self.ti_data += struct.pack(f"{self.get_pack_prefix()}I", n_dims) + self.ti_data += struct.pack(f"{self.pack_prefix}I", n_dims) for i in range(n_dims): - self.ti_data += struct.pack(f"{self.get_pack_prefix()}Q", tensor_shape[n_dims - 1 - i]) + self.ti_data += struct.pack(f"{self.pack_prefix}Q", tensor_shape[n_dims - 1 - i]) if raw_dtype is None: dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16 else: dtype = raw_dtype - self.ti_data += struct.pack(f"{self.get_pack_prefix()}I", dtype) - self.ti_data += struct.pack(f"{self.get_pack_prefix()}Q", self.offset_tensor) + self.ti_data += struct.pack(f"{self.pack_prefix}I", dtype) + self.ti_data += struct.pack(f"{self.pack_prefix}Q", self.offset_tensor) self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment) self.ti_data_count += 1 diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml index 07a7ab4dd11fc..f0741a7c23e03 100644 --- a/gguf-py/pyproject.toml +++ b/gguf-py/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gguf" -version = "0.4.4" +version = "0.4.5" description = "Write ML models in GGUF for GGML" authors = ["GGML "] packages = [ From e513abe37e5994873a6d3f8a1adcb1bcfc6a8b40 Mon Sep 17 00:00:00 2001 From: chenqiny Date: Sun, 15 Oct 2023 23:59:53 +0800 Subject: [PATCH 7/8] always use "GGUF" as beginng of GGUF file --- ggml.c | 8 +------- ggml.h | 14 ++++++++++++-- gguf-py/gguf/gguf.py | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/ggml.c b/ggml.c index 04b88c98a837a..6d1776ca46741 100644 --- a/ggml.c +++ b/ggml.c @@ -20916,13 +20916,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p gguf_fread_el(file, &magic, sizeof(magic), &offset); if (magic != GGUF_MAGIC) { - if (magic == GGUF_WRONG_ENIAN_MAGIC) - { - fprintf(stderr, "Endianess of the GGUF file and platform do not match.%s: invalid magic number %08x.\n", __func__, magic); - } - else { - fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic); - } + fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic); fclose(file); return NULL; } diff --git a/ggml.h b/ggml.h index fdd8e31bec70e..10ae3c033bab9 100644 --- a/ggml.h +++ b/ggml.h @@ -231,8 +231,18 @@ #define GGML_EXIT_SUCCESS 0 #define GGML_EXIT_ABORTED 1 -#define GGUF_MAGIC 0x46554747 // "GGUF" -#define GGUF_WRONG_ENIAN_MAGIC 0x47475546 +#if defined(__linux__) + #include + #if BYTE_ORDER == LITTLE_ENDIAN + #define GGUF_MAGIC 0x46554747 + #elif BYTE_ORDER == BIG_ENDIAN + #define GGUF_MAGIC 0x47475546 + #endif +#else + // Use little endian magic uint_32 value + #define GGUF_MAGIC 0x46554747 +#endif + #define GGUF_VERSION 3 #define GGUF_DEFAULT_ALIGNMENT 32 diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 748d59343c27b..a5f92dd6df958 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -652,7 +652,7 @@ def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True print(f"This gguf file is for {endianess_str} only") def write_header_to_file(self): - self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_MAGIC)) + self.fout.write(struct.pack(" Date: Fri, 20 Oct 2023 18:45:19 +0800 Subject: [PATCH 8/8] Compare "GGUF" with file header char by char 1. Set GGUF_MAGIC to "GGUF" string instead of int value 2. Compare "GGUF" char by char to ensure its byte order 3. Move bytes swap code from convert.py to gguf.py write_tensor_data --- convert.py | 2 -- .../convert-llama2c-to-ggml.cpp | 2 +- ggml.c | 19 +++++++++++-------- ggml.h | 12 +----------- gguf-py/gguf/gguf.py | 2 ++ 5 files changed, 15 insertions(+), 22 deletions(-) diff --git a/convert.py b/convert.py index 59f6222ed5a68..24da25efcaca1 100755 --- a/convert.py +++ b/convert.py @@ -932,8 +932,6 @@ def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyM elapsed = time.time() - start size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) padi = len(str(len(model))) - if endianess==gguf.GGUFEndian.BIG: - ndarray.byteswap(inplace=True) print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}") of.gguf.write_tensor_data(ndarray) diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index c291f0adf20e1..cae3bf3c3dc65 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -536,7 +536,7 @@ static bool is_ggml_file(const char * filename) { if (file.size < 4) { return false; } - uint32_t magic = file.read_u32(); + std::string magic = file.read_string(4); return magic == GGUF_MAGIC; } diff --git a/ggml.c b/ggml.c index 6d1776ca46741..a24933a1b9907 100644 --- a/ggml.c +++ b/ggml.c @@ -20813,7 +20813,7 @@ struct gguf_kv { }; struct gguf_header { - uint32_t magic; + char magic[4]; uint32_t version; uint64_t n_tensors; // GGUFv2 uint64_t n_kv; // GGUFv2 @@ -20883,7 +20883,7 @@ static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) struct gguf_context * gguf_init_empty(void) { struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context)); - ctx->header.magic = GGUF_MAGIC; + memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic)); ctx->header.version = GGUF_VERSION; ctx->header.n_tensors = 0; ctx->header.n_kv = 0; @@ -20909,16 +20909,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // offset from start of file size_t offset = 0; - uint32_t magic = 0; + char magic[4]; // check the magic before making allocations { gguf_fread_el(file, &magic, sizeof(magic), &offset); - if (magic != GGUF_MAGIC) { - fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic); - fclose(file); - return NULL; + for (uint32_t i = 0; i < sizeof(magic); i++) { + if (magic[i] != GGUF_MAGIC[i]) { + fprintf(stderr, "%s: invalid magic characters %s.\n", __func__, magic); + fclose(file); + return NULL; + } } } @@ -20928,7 +20930,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // read the header { - ctx->header.magic = magic; + strncpy(ctx->header.magic, magic, 4); + ctx->kv = NULL; ctx->infos = NULL; diff --git a/ggml.h b/ggml.h index 10ae3c033bab9..c748fea941cff 100644 --- a/ggml.h +++ b/ggml.h @@ -231,17 +231,7 @@ #define GGML_EXIT_SUCCESS 0 #define GGML_EXIT_ABORTED 1 -#if defined(__linux__) - #include - #if BYTE_ORDER == LITTLE_ENDIAN - #define GGUF_MAGIC 0x46554747 - #elif BYTE_ORDER == BIG_ENDIAN - #define GGUF_MAGIC 0x47475546 - #endif -#else - // Use little endian magic uint_32 value - #define GGUF_MAGIC 0x46554747 -#endif +#define GGUF_MAGIC "GGUF" #define GGUF_VERSION 3 diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index a5f92dd6df958..16e7359792632 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -804,6 +804,8 @@ def write_padding(self, fp: BinaryIO, n: int, align: int | None = None): fp.write(bytes([0] * pad)) def write_tensor_data(self, tensor: np.ndarray[Any, Any]): + if self.endianess==GGUFEndian.BIG: + tensor.byteswap(inplace=True) self.write_padding(self.fout, self.fout.tell()) tensor.tofile(self.fout) self.write_padding(self.fout, tensor.nbytes)