From eb5b8327f688d1815f61786020468ccd8dfc53f7 Mon Sep 17 00:00:00 2001 From: chenqiny Date: Fri, 20 Oct 2023 18:45:19 +0800 Subject: [PATCH] Compare "GGUF" with file header char by char 1. Set GGUF_MAGIC to "GGUF" string instead of int value 2. Compare "GGUF" char by char to ensure its byte order 3. Move bytes swap code from convert.py to gguf.py write_tensor_data --- convert.py | 2 -- .../convert-llama2c-to-ggml.cpp | 2 +- ggml.c | 19 +++++++++++-------- ggml.h | 12 +----------- gguf-py/gguf/gguf.py | 2 ++ 5 files changed, 15 insertions(+), 22 deletions(-) diff --git a/convert.py b/convert.py index 59f6222ed5a68..24da25efcaca1 100755 --- a/convert.py +++ b/convert.py @@ -932,8 +932,6 @@ def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyM elapsed = time.time() - start size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) padi = len(str(len(model))) - if endianess==gguf.GGUFEndian.BIG: - ndarray.byteswap(inplace=True) print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}") of.gguf.write_tensor_data(ndarray) diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index c291f0adf20e1..cae3bf3c3dc65 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -536,7 +536,7 @@ static bool is_ggml_file(const char * filename) { if (file.size < 4) { return false; } - uint32_t magic = file.read_u32(); + std::string magic = file.read_string(4); return magic == GGUF_MAGIC; } diff --git a/ggml.c b/ggml.c index 6d1776ca46741..a24933a1b9907 100644 --- a/ggml.c +++ b/ggml.c @@ -20813,7 +20813,7 @@ struct gguf_kv { }; struct gguf_header { - uint32_t magic; + char magic[4]; uint32_t version; uint64_t n_tensors; // GGUFv2 uint64_t n_kv; // GGUFv2 @@ -20883,7 +20883,7 @@ static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) struct gguf_context * gguf_init_empty(void) { struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context)); - ctx->header.magic = GGUF_MAGIC; + memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic)); ctx->header.version = GGUF_VERSION; ctx->header.n_tensors = 0; ctx->header.n_kv = 0; @@ -20909,16 +20909,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // offset from start of file size_t offset = 0; - uint32_t magic = 0; + char magic[4]; // check the magic before making allocations { gguf_fread_el(file, &magic, sizeof(magic), &offset); - if (magic != GGUF_MAGIC) { - fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic); - fclose(file); - return NULL; + for (uint32_t i = 0; i < sizeof(magic); i++) { + if (magic[i] != GGUF_MAGIC[i]) { + fprintf(stderr, "%s: invalid magic characters %s.\n", __func__, magic); + fclose(file); + return NULL; + } } } @@ -20928,7 +20930,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // read the header { - ctx->header.magic = magic; + strncpy(ctx->header.magic, magic, 4); + ctx->kv = NULL; ctx->infos = NULL; diff --git a/ggml.h b/ggml.h index 10ae3c033bab9..c748fea941cff 100644 --- a/ggml.h +++ b/ggml.h @@ -231,17 +231,7 @@ #define GGML_EXIT_SUCCESS 0 #define GGML_EXIT_ABORTED 1 -#if defined(__linux__) - #include - #if BYTE_ORDER == LITTLE_ENDIAN - #define GGUF_MAGIC 0x46554747 - #elif BYTE_ORDER == BIG_ENDIAN - #define GGUF_MAGIC 0x47475546 - #endif -#else - // Use little endian magic uint_32 value - #define GGUF_MAGIC 0x46554747 -#endif +#define GGUF_MAGIC "GGUF" #define GGUF_VERSION 3 diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index a5f92dd6df958..16e7359792632 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -804,6 +804,8 @@ def write_padding(self, fp: BinaryIO, n: int, align: int | None = None): fp.write(bytes([0] * pad)) def write_tensor_data(self, tensor: np.ndarray[Any, Any]): + if self.endianess==GGUFEndian.BIG: + tensor.byteswap(inplace=True) self.write_padding(self.fout, self.fout.tell()) tensor.tofile(self.fout) self.write_padding(self.fout, tensor.nbytes)