Skip to content

Commit

Permalink
update format
Browse files Browse the repository at this point in the history
  • Loading branch information
jameswu2014 committed Sep 4, 2023
1 parent bd72ba0 commit d1940a3
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 27 deletions.
19 changes: 3 additions & 16 deletions convert-baichuan-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ def parse_args() -> argparse.Namespace:
ctx_length = hparams["max_sequence_length"]
elif "max_position_embeddings" in hparams:
ctx_length = hparams["max_position_embeddings"]
elif "model_max_length" in hparams:
ctx_length = hparams["model_max_length"]
else:
print("gguf: can not find ctx length parameter.")

Expand Down Expand Up @@ -231,12 +233,7 @@ def parse_args() -> argparse.Namespace:

tmp=model_part
for i in itertools.count():
if f"model.layers.{i}.self_attn.q_proj.weight" in model_part:
print(f"Permuting layer {i}")
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = reverse_hf_permute(model_part[f"model.layers.{i}.self_attn.q_proj.weight"], head_count, head_count)
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = reverse_hf_permute(model_part[f"model.layers.{i}.self_attn.k_proj.weight"], head_count, head_count_kv)
#tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
elif f"model.layers.{i}.self_attn.W_pack.weight" in model_part:
if f"model.layers.{i}.self_attn.W_pack.weight" in model_part:
print(f"Unpacking and permuting layer {i}")
tmp[f"model.layers.{i}.self_attn.q_proj.weight"]=reverse_hf_permute_part(model_part[f"model.layers.{i}.self_attn.W_pack.weight"],0,head_count,head_count)
tmp[f"model.layers.{i}.self_attn.k_proj.weight"]=reverse_hf_permute_part(model_part[f"model.layers.{i}.self_attn.W_pack.weight"],1,head_count,head_count_kv)
Expand All @@ -259,14 +256,6 @@ def parse_args() -> argparse.Namespace:

data = data.squeeze().numpy()

# reverse permute these
# if name.endswith(".q_proj.weight"):
# data = reverse_hf_permute(data, head_count)
# if name.endswith(".k_proj.weight"):
# data = reverse_hf_permute(data, head_count, head_count_kv)



# map tensor names
new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
if new_name is None:
Expand All @@ -289,8 +278,6 @@ def parse_args() -> argparse.Namespace:
data = data.astype(np.float16)

print(name + " -> " + new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))


gguf_writer.add_tensor(new_name, data)


Expand Down
15 changes: 4 additions & 11 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1948,7 +1948,6 @@ static void llm_load_tensors(
const int64_t n_vocab = hparams.n_vocab;

const auto tn = LLM_TN(model.arch);

switch (model.arch) {
case LLM_ARCH_LLAMA:
{
Expand Down Expand Up @@ -2777,13 +2776,11 @@ static struct ggml_cgraph * llm_build_baichaun(

struct ggml_tensor * Kcur;
struct ggml_tensor * Qcur;
switch (model.type)
{
switch (model.type) {
case MODEL_7B:
Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, N), n_past, n_embd_head, 0, 0, freq_base, freq_scale);
Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N), n_past, n_embd_head, 0, 0, freq_base, freq_scale);
Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N), n_past, n_embd_head, 0, 0, freq_base, freq_scale);
break;

case MODEL_13B:
Kcur = ggml_reshape_3d(ctx0, tmpk, n_embd/n_head, n_head, N);
Qcur = ggml_reshape_3d(ctx0, tmpq, n_embd/n_head, n_head, N);
Expand All @@ -2797,8 +2794,6 @@ static struct ggml_cgraph * llm_build_baichaun(

offload_func_kq(Qcur);
ggml_set_name(Qcur, "Qcur");



// store key and value to memory
{
Expand Down Expand Up @@ -2853,13 +2848,11 @@ static struct ggml_cgraph * llm_build_baichaun(

struct ggml_tensor * KQ_masked;
struct ggml_tensor * KQ_scaled_alibi;
// if model.type == MODEL_13B,here add kq_scaled_alibi
switch (model.type)
{

switch (model.type) {
case MODEL_7B:
KQ_masked = ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past);
break;

case MODEL_13B:
KQ_scaled_alibi =ggml_alibi(ctx0, KQ_scaled, n_past, n_head, 8);
ggml_set_name(KQ_scaled_alibi, "KQ_scaled_alibi");
Expand Down

0 comments on commit d1940a3

Please sign in to comment.