Skip to content

Commit

Permalink
Opt 66b (#19)
Browse files Browse the repository at this point in the history
* autoloader for opt
* opt-66b inference
* Update train.py
* Load data from example dir
* add readme of multi GPU inference

Co-authored-by: Zac Liu <liuguang@baai.ac.cn>
  • Loading branch information
920232796 and marscrazy authored Jul 6, 2022
1 parent 3e52907 commit 3a0c8cb
Show file tree
Hide file tree
Showing 14 changed files with 430 additions and 248 deletions.
12 changes: 8 additions & 4 deletions examples/glm_title_generation/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,16 @@
num_checkpoints=1,
)

cur_dir = os.path.dirname(os.path.abspath(__file__))
src_dir = cur_dir + '/data/train.src'
tgt_dir = cur_dir + '/data/train.tgt'
# cur_dir = os.path.dirname(os.path.abspath(__file__))
# src_dir = cur_dir + '/data/train.src'
# tgt_dir = cur_dir + '/data/train.tgt'

src_dir = "./data/train.src"
tgt_dir = "./data/train.tgt"


maxlen = 256
auto_loader = AutoLoader("seq2seq",
auto_loader = AutoLoader("lm",
model_name="GLM-large-ch",
model_dir="./state_dict/")
model = auto_loader.get_model()
Expand Down
98 changes: 97 additions & 1 deletion examples/opt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,100 @@ out = predictor.predict_generate_randomsample(text,
repetition_penalty=3.0)

print(f"input is {text} \n out is {out}")
```
```

# Multi-GPU inference
## OPT-30b

To inference by multi-GPU and model parallel, we use torch-DDP and Megatron-LM library.
### Basic step
1. Set up the parameters of model parallel, such as ```model_parallel_size``` and ```world_size```
2. Initialize torch-DDP
3. Initialize Megatron-LM, model parallel
4. Set up random seed
5. Initialize the model and tokenizer
6. Prediction
### code
```python
import torch
import os
import argparse
from flagai import mpu
from flagai.auto_model.auto_loader import AutoLoader
import random
import numpy as np
from flagai.model.predictor.predictor import Predictor

# run script : python -m torch.distributed.launch --nproc_per_node=4 --nnodes=1 opt_30b_en_mutigpu.py
os.environ["ENV_TYPE"] = "deepspeed+mpu"
model_parallel_size = 4
world_size = 4

os.environ["MODEL_PARALLEL_SIZE"] = str(model_parallel_size)
os.environ["WORLD_SIZE"] = str(world_size)

def set_random_seed(seed):
"""Set random seed for reproducability."""
if seed is not None and seed > 0:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
mpu.model_parallel_cuda_manual_seed(seed)

parser = argparse.ArgumentParser()
parser.add_argument('--local_rank',
type=int,
default=0,
help="local_rank")

ds_args = parser.parse_args()
local_rank = ds_args.local_rank

master_addr = os.environ.get('MASTER_ADDR', '127.0.0.1')
master_port = os.environ.get('MASTER_PORT', '17501')

device = torch.device("cuda", local_rank)

def initialize_distributed():
"""Initialize torch.distributed."""
torch.backends.cudnn.enabled = False
# Manually set the device ids.
torch.cuda.set_device(device)
# Call the init process
init_method = 'tcp://'

init_method += master_addr + ':' + master_port
torch.distributed.init_process_group(
backend='nccl', # gloo
world_size=world_size,
rank=local_rank,
init_method=init_method)
mpu.initialize_model_parallel(model_parallel_size)

initialize_distributed()

set_random_seed(123)

print(f"building model...")
loader = AutoLoader("lm", model_name="opt-30b-en")
model = loader.get_model()
tokenizer = loader.get_tokenizer()
model.half()

model.parallel_output = False
model.eval()
model.to(device)

torch.distributed.barrier(group=mpu.get_model_parallel_group())

text = """I think The Old Man and the Sea is a very good book, what do you think? I think """

predictor = Predictor(model, tokenizer)
out = predictor.predict_generate_randomsample(text)
if mpu.get_model_parallel_rank() == 0:
print(f"pred is {out}")
```
### Run script is
```commandline
python -m torch.distributed.launch --nproc_per_node=4 --nnodes=1 opt_30b_en_mutigpu.py
```
22 changes: 22 additions & 0 deletions examples/opt/generate_opt_66b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from flagai.model.predictor.predictor import Predictor
from flagai.auto_model.auto_loader import AutoLoader
import torch

loader = AutoLoader(task_name="lm",
model_name="opt-66b-en")

model = loader.get_model()
tokenizer = loader.get_tokenizer()
model.eval()

text = """I think The Old Man and the Sea is a very good book, what do you think? Thank you for your question, I think """

predictor = Predictor(model, tokenizer)
out = predictor.predict_generate_randomsample(text,
input_max_length=100,
out_max_length=300,
top_k=50,
top_p=0.9,
repetition_penalty=3.0)

print(f"input is {text} \n out is {out}")
3 changes: 0 additions & 3 deletions examples/opt/opt_30b_en_mutigpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import glob
import time

# run script : python -m torch.distributed.launch --nproc_per_node=4 --nnodes=1 opt_30b_en_mutigpu.py

os.environ["ENV_TYPE"] = "deepspeed+mpu"
model_parallel_size = 4
world_size = 4
Expand Down Expand Up @@ -61,7 +59,6 @@ def initialize_distributed():

set_random_seed(123)


print(f"building model...")
loader = AutoLoader("lm", model_name="opt-30b-en")
model = loader.get_model()
Expand Down
108 changes: 108 additions & 0 deletions examples/opt/opt_66b_en_mutigpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,2"
import torch
import os
import time
os.environ["ENV_TYPE"] = "deepspeed+mpu"
os.environ["MODEL_PARALLEL_SIZE"] = '8'
os.environ["WORLD_SIZE"] = '8'
import argparse
from flagai import mpu
import random
import numpy as np
from flagai.model.predictor.predictor import Predictor
from flagai.model.opt_model import OPTModel
from flagai.data.tokenizer import OPTTokenizer

def get_current_rank():
with open('current_rank','r',encoding='utf8') as infile:
line = infile.readline().strip()
return int(line)
def set_current_rank(rank):
with open('current_rank','w',encoding='utf8') as outfile:
outfile.write(str(rank))

def get_current_pool():
with open('current_pool','r',encoding='utf8') as infile:
line = infile.readline().strip()
return int(line)

def set_current_pool(rank):
with open('current_pool','w',encoding='utf8') as outfile:
outfile.write(str(rank))

# run script : python -m torch.distributed.launch --nproc_per_node=2 --nnodes=1 opt_66b_en_mutigpu.py
parser = argparse.ArgumentParser()
parser.add_argument('--local_rank',
type=int,
default=0,
help="local_rank")

def set_random_seed(seed):
"""Set random seed for reproducability."""
if seed is not None and seed > 0:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
mpu.model_parallel_cuda_manual_seed(seed)

ds_args = parser.parse_args()
local_rank = ds_args.local_rank

master_addr = os.environ.get('MASTER_ADDR', '127.0.0.1')
master_port = os.environ.get('MASTER_PORT', '17501')

device = torch.device("cuda", local_rank)
model_parallel_size = 8
world_size = 8

def initialize_distributed():
"""Initialize torch.distributed."""
torch.backends.cudnn.enabled = False
# Manually set the device ids.
torch.cuda.set_device(device)
# Call the init process
init_method = 'tcp://'

init_method += master_addr + ':' + master_port
torch.distributed.init_process_group(
backend='nccl', # gloo
world_size=world_size,
rank=local_rank,
init_method=init_method)
mpu.initialize_model_parallel(model_parallel_size)

initialize_distributed()

set_current_pool(4)
set_current_rank(0)
set_random_seed(123)
torch.distributed.barrier(group=mpu.get_model_parallel_group())
tokenizer = OPTTokenizer()

while get_current_rank() != local_rank:
time.sleep(10)
while get_current_pool() == 0:
time.sleep(10)
set_current_pool(get_current_pool()-1)
print("loading rank {}".format(local_rank))
set_current_rank(local_rank + 1)

model = OPTModel.init_from_json('/mnt/models_xingzhaohu/opt-66b-en/config.json')
checkpoint_path = '/mnt/models_xingzhaohu/opt-66b-en/pytorch_model_{:02d}.bin'.format(local_rank)
model.half()
model.eval()
model.to(device)
model.load_weights(checkpoint_path)

print("loading rank {} finished".format(local_rank))
set_current_pool(get_current_pool()+1)
print('current rank setting is {}'.format(get_current_pool()))

torch.distributed.barrier(group=mpu.get_model_parallel_group())
text = """I think The Old Man and the Sea is a very good book, what do you think? I think """

predictor = Predictor(model, tokenizer)
out = predictor.predict_generate_randomsample(text)
if mpu.get_model_parallel_rank() == 0:
print(f"pred is {out}")

8 changes: 8 additions & 0 deletions flagai/auto_model/auto_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ def __getattr__(self, name):
"opt-6.7b-en": ["flagai.model.opt_model","OPTModel", "opt"],
"opt-13b-en": ["flagai.model.opt_model","OPTModel", "opt"],
"opt-30b-en": ["flagai.model.opt_model","OPTModel", "opt"],
"opt-66b-en": ["flagai.model.opt_model","OPTModel", "opt"],

}

TOKENIZER_DICT = {
Expand All @@ -96,6 +98,8 @@ def __getattr__(self, name):
"opt-6.7b-en": ["flagai.data.tokenizer.opt.opt_en_tokenizer","OPTTokenizer"],
"opt-13b-en": ["flagai.data.tokenizer.opt.opt_en_tokenizer","OPTTokenizer"],
"opt-30b-en": ["flagai.data.tokenizer.opt.opt_en_tokenizer","OPTTokenizer"],
"opt-66b-en": ["flagai.data.tokenizer.opt.opt_en_tokenizer","OPTTokenizer"],

}


Expand All @@ -106,6 +110,7 @@ def __init__(self,
model_name: str = "RoBERTa-base-ch",
model_dir: str = "./checkpoints/",
only_download_config: bool = False,
device="cpu",
**kwargs):
"""
Args:
Expand Down Expand Up @@ -169,6 +174,7 @@ def __init__(self,
download_path=model_dir,
model_name=model_name_,
only_download_config=only_download_config,
device=device,
**kwargs)

model_id = _get_model_id(model_name)
Expand All @@ -178,6 +184,8 @@ def __init__(self,
vocab_file = os.path.join(download_path,'cog-pretrained.model')
if not os.path.exists(vocab_file):
vocab_file = _get_vocab_path(download_path, "cog-pretrain.model", model_id)
elif model_name == "glm-large-en":
vocab_file = "GLM-large-en"
elif model_name == "cpm-large-ch":
# two files to load
vocab_file_1 = os.path.join(download_path, "vocab.json")
Expand Down
2 changes: 2 additions & 0 deletions flagai/model/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def from_pretrain(cls,
download_path='./checkpoints/',
model_name='RoBERTa-base-ch',
only_download_config=False,
device="cpu",
**kwargs):
model_id = None
try:
Expand Down Expand Up @@ -87,6 +88,7 @@ def from_pretrain(cls,
model_id)
if os.path.exists(config_path):
model = cls.init_from_json(config_path, **kwargs)
model.to(device)
if os.getenv('ENV_TYPE') != 'deepspeed+mpu':
if os.path.exists(checkpoint_path):
model.load_weights(checkpoint_path)
Expand Down
2 changes: 2 additions & 0 deletions flagai/model/blocks/gpt2_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def __init__(self, n_ctx, config, scale=False):
def forward(
self,
hidden_states,
layer_past=None,
attention_mask=None,
head_mask=None,
use_cache=False,
Expand All @@ -34,6 +35,7 @@ def forward(

attn_outputs = self.attn(
hidden_states,
layer_past=layer_past,
attention_mask=attention_mask,
head_mask=head_mask,
use_cache=use_cache,
Expand Down
Loading

0 comments on commit 3a0c8cb

Please sign in to comment.