Skip to content

Commit

Permalink
[llama3.2] refine docs
Browse files Browse the repository at this point in the history
  • Loading branch information
sgwzy22 committed Nov 7, 2024
1 parent 76e73be commit da71e3c
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 20 deletions.
10 changes: 5 additions & 5 deletions models/Llama3_2-Vision/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ source ./envsetup.sh

``` shell
pip install -r requirements.txt
cp ./compile/files/Llama-3.2-11B-Vision-Instruct/modeling_llama.py /usr/local/lib/python3.10/dist-packages/transformers/models/mllama/modeling_llama.py
cp ./compile/files/Llama-3.2-11B-Vision-Instruct/modeling_mllama.py /usr/local/lib/python3.10/dist-packages/transformers/models/mllama/modeling_mllama.py
```
* PS:不一定是/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py这个路径,建议替换前先pip show transformers查看一下
* PS:不一定是/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_mllama.py这个路径,建议替换前先pip show transformers查看一下

### 步骤五:生成onnx文件

Expand Down Expand Up @@ -127,7 +127,7 @@ python3 pipeline.py --help
主要对.shape等操作转换为静态shape,避免模型过大导致常量折叠失败,进而导致动态子网
还有一些scatterND操作,通过改为concat实现,理由同上

-`modeling_llama.py`文件中如下代码:
-`modeling_mllama.py`文件中如下代码:

```python
def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
Expand All @@ -151,7 +151,7 @@ python3 pipeline.py --help
return q_embed, k_embed
```

-`modeling_llama.py`文件中如下代码:
-`modeling_mllama.py`文件中如下代码:

```python
def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
Expand All @@ -171,7 +171,7 @@ python3 pipeline.py --help
return hidden_states.reshape(batch, slen, num_key_value_heads * n_rep, head_dim).transpose(1, 2)
```

-`modeling_llama.py`文件中如下代码:
-`modeling_mllama.py`文件中如下代码:

```python
class LlamaAttention(nn.Module):
Expand Down
15 changes: 0 additions & 15 deletions models/Llama3_2-Vision/compile/export_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

import os
import torch
import numpy
import argparse
import transformers
from tqdm import tqdm
Expand Down Expand Up @@ -438,37 +437,23 @@ def test_net_with_mask(image_path):
block_kvs.append(CrossBlockCache(i))

# inference
numpy.savez('verify/vit_in.npz',input_0=inputs['pixel_values'].squeeze(0).squeeze(0), input_1=inputs['aspect_ratio_ids'], input_2=inputs['aspect_ratio_mask'].squeeze(0))
vit_out = vit(inputs['pixel_values'].squeeze(0).squeeze(0),
inputs['aspect_ratio_ids'],
inputs['aspect_ratio_mask'].squeeze(0))
numpy.savez('verify/vit_out.npz',output_0=vit_out)
numpy.savez('verify/embed_in.npz',input_0=input_ids)
out = embed(input_ids).view(1, SEQ_LENGTH, HIDDEN_SIZE)
numpy.savez('verify/embed_ref.npz',output_0=out)
k_cache = []
v_cache = []
for i in range(NUM_LAYERS):
if i not in CROSS_ATTN_LAYERS:
if i == 2:
numpy.savez('verify/block_in.npz',input_0=out,input_1=position_ids,input_2=attention_mask)
out, k, v = blocks[i](out, position_ids, attention_mask)
if i == 2:
numpy.savez('verify/block_ref.npz',output_0=out,output_1=k,output_2=v)
k_cache.append(k)
v_cache.append(v)
else:
if i == 3:
numpy.savez('verify/cblock_in.npz',input_0=out,input_1=vit_out,input_2=text_row_mask,input_3=cross_attn_mask)
out, k, v = blocks[i](out, vit_out, text_row_mask, cross_attn_mask)
if i == 3:
numpy.savez('verify/cblock_ref.npz',output_0=out,output_1=k,output_2=v)
k_cache.append(k)
v_cache.append(v)
out = out[:, token_len - 1: token_len].view(1, 1, HIDDEN_SIZE)
numpy.savez('verify/head_in.npz',input_0=out)
token = greedy_head(lm_head(out)).view(1)
numpy.savez('verify/head_ref.npz',output_0=token)
out_ids = [int(token)]
word = processor.decode([int(token)])
print(word, end="")
Expand Down

0 comments on commit da71e3c

Please sign in to comment.