Skip to content

Commit

Permalink
PaddleAPEX monitor Llama-13b
Browse files Browse the repository at this point in the history
  • Loading branch information
CannotBeFatAnyMore committed May 28, 2024
1 parent c1cfe63 commit d20e00c
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions paddlenlp/transformers/llama/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1353,7 +1353,13 @@ class LlamaModel(LlamaPretrainedModel):
"""

def __init__(self, config: LlamaConfig):
#####################################
from paddlenlp.PaddleAPEX import Acc
self.checker = Acc()
#####################################
super().__init__(config)


self.vocab_size = config.vocab_size
self.hidden_size = config.hidden_size
self.sequence_parallel = config.sequence_parallel
Expand Down Expand Up @@ -1471,6 +1477,10 @@ def forward(
return_dict=False,
**kwargs,
):
#####################################
self.checker.start()
#####################################

if self.sequence_parallel and use_cache:
raise ValueError("We currently only support sequence parallel without cache.")

Expand Down Expand Up @@ -1615,6 +1625,11 @@ def forward(

next_cache = next_decoder_cache if use_cache else None


#####################################
self.checker.stop()
#####################################

if not return_dict:
return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
return BaseModelOutputWithPastAndCrossAttentions(
Expand Down

0 comments on commit d20e00c

Please sign in to comment.