update format

ggerganov · Sep 4, 2023 · d1940a3 · d1940a3
1 parent bd72ba0
commit d1940a3
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 27 deletions.
diff --git a/convert-baichuan-hf-to-gguf.py b/convert-baichuan-hf-to-gguf.py
@@ -120,6 +120,8 @@ def parse_args() -> argparse.Namespace:
     ctx_length = hparams["max_sequence_length"]
 elif "max_position_embeddings" in hparams:
     ctx_length = hparams["max_position_embeddings"]
+elif "model_max_length" in hparams:
+    ctx_length = hparams["model_max_length"]
 else:
     print("gguf: can not find ctx length parameter.")
 
@@ -231,12 +233,7 @@ def parse_args() -> argparse.Namespace:
 
     tmp=model_part
     for i in itertools.count():
-        if f"model.layers.{i}.self_attn.q_proj.weight" in model_part:
-            print(f"Permuting layer {i}")
-            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = reverse_hf_permute(model_part[f"model.layers.{i}.self_attn.q_proj.weight"], head_count, head_count)
-            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = reverse_hf_permute(model_part[f"model.layers.{i}.self_attn.k_proj.weight"], head_count, head_count_kv)
-           #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
-        elif f"model.layers.{i}.self_attn.W_pack.weight" in model_part:
+        if f"model.layers.{i}.self_attn.W_pack.weight" in model_part:
             print(f"Unpacking and permuting layer {i}")
             tmp[f"model.layers.{i}.self_attn.q_proj.weight"]=reverse_hf_permute_part(model_part[f"model.layers.{i}.self_attn.W_pack.weight"],0,head_count,head_count)
             tmp[f"model.layers.{i}.self_attn.k_proj.weight"]=reverse_hf_permute_part(model_part[f"model.layers.{i}.self_attn.W_pack.weight"],1,head_count,head_count_kv)
@@ -259,14 +256,6 @@ def parse_args() -> argparse.Namespace:
 
         data = data.squeeze().numpy()
 
-        # reverse permute these
-        # if name.endswith(".q_proj.weight"):
-        #     data = reverse_hf_permute(data, head_count)
-        # if name.endswith(".k_proj.weight"):
-        #     data = reverse_hf_permute(data, head_count, head_count_kv)
-
-
-
         # map tensor names
         new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
         if new_name is None:
@@ -289,8 +278,6 @@ def parse_args() -> argparse.Namespace:
             data = data.astype(np.float16)
 
         print(name + " -> " +  new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
-
-
         gguf_writer.add_tensor(new_name, data)
 
 

diff --git a/llama.cpp b/llama.cpp
@@ -1948,7 +1948,6 @@ static void llm_load_tensors(
         const int64_t n_vocab    = hparams.n_vocab;
 
         const auto tn = LLM_TN(model.arch);
-
         switch (model.arch) {
             case LLM_ARCH_LLAMA:
                 {
@@ -2777,13 +2776,11 @@ static struct ggml_cgraph * llm_build_baichaun(
 
             struct ggml_tensor * Kcur;
             struct ggml_tensor * Qcur;
-            switch (model.type)
-            {
+            switch (model.type) {
                 case MODEL_7B:
                     Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, N), n_past, n_embd_head, 0, 0, freq_base, freq_scale);
-                    Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N),    n_past, n_embd_head, 0, 0, freq_base, freq_scale);  
+                    Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N),    n_past, n_embd_head, 0, 0, freq_base, freq_scale); 
                     break;
-
                 case MODEL_13B:
                     Kcur  = ggml_reshape_3d(ctx0, tmpk, n_embd/n_head, n_head, N);
                     Qcur = ggml_reshape_3d(ctx0, tmpq, n_embd/n_head, n_head, N);
@@ -2797,8 +2794,6 @@ static struct ggml_cgraph * llm_build_baichaun(
 
             offload_func_kq(Qcur);
             ggml_set_name(Qcur, "Qcur");
-
-
 
             // store key and value to memory
             {
@@ -2853,13 +2848,11 @@ static struct ggml_cgraph * llm_build_baichaun(
 
             struct ggml_tensor * KQ_masked;
             struct ggml_tensor * KQ_scaled_alibi;
-            // if model.type == MODEL_13B,here add kq_scaled_alibi 
-            switch (model.type)
-            {
+
+            switch (model.type) {
                 case MODEL_7B:
                     KQ_masked = ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past);
                     break;
-
                 case MODEL_13B:
                     KQ_scaled_alibi =ggml_alibi(ctx0, KQ_scaled, n_past, n_head, 8);
                     ggml_set_name(KQ_scaled_alibi, "KQ_scaled_alibi");