Skip to content

Commit

Permalink
add recomptue interval for pp (#860)
Browse files Browse the repository at this point in the history
  • Loading branch information
FeixLiu authored Nov 1, 2022
1 parent 5753377 commit 5eceb07
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
9 changes: 7 additions & 2 deletions ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,8 @@ def __init__(self,
recompute_granularity="full",
virtual_pp_degree=1,
sequence_parallel=False,
no_recompute_layers=None):
no_recompute_layers=None,
pp_recompute_interval=1):

# forward desc
self.descs = []
Expand Down Expand Up @@ -1057,7 +1058,11 @@ def _logits_helper(embedding, output):

recompute_interval = 0
if recompute and recompute_granularity == "full":
recompute_interval = 1
assert pp_recompute_interval <= \
num_layers // (virtual_pp_degree *
fleet.get_hybrid_communicate_group().topology().get_dim_size("pipe")), \
"pp recompute interval should smaller than num layers of each pp chunk"
recompute_interval = pp_recompute_interval

super().__init__(
layers=self.descs,
Expand Down
1 change: 1 addition & 0 deletions ppfleetx/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def process_dist_config(configs):

mp_degree = config.setdefault("mp_degree", 1)
pp_degree = config.setdefault("pp_degree", 1)
pp_recompute_interval = config.setdefault("pp_recompute_interval", 1)

# sharding default
sharding_config = config['sharding']
Expand Down

0 comments on commit 5eceb07

Please sign in to comment.