From 5b7b8219b65e6b53b9e6f0ad3de3fe552dfcff20 Mon Sep 17 00:00:00 2001 From: Kunbo Ding Date: Mon, 29 Jan 2024 22:07:36 +0800 Subject: [PATCH] [Cherry-pick] logger level Cherry-pick of #7903 --- paddlenlp/peft/lora/lora_model.py | 2 +- paddlenlp/peft/prefix/prefix_model.py | 2 +- paddlenlp/trainer/integrations.py | 2 +- paddlenlp/trainer/trainer.py | 18 +++++++++--------- paddlenlp/trainer/trainer_callback.py | 2 +- paddlenlp/trainer/training_args.py | 12 ++++++------ paddlenlp/transformers/model_utils.py | 26 +++++++++++++++++--------- 7 files changed, 36 insertions(+), 28 deletions(-) diff --git a/paddlenlp/peft/lora/lora_model.py b/paddlenlp/peft/lora/lora_model.py index 5fa982c4d586..ccea3c006a0f 100644 --- a/paddlenlp/peft/lora/lora_model.py +++ b/paddlenlp/peft/lora/lora_model.py @@ -489,7 +489,7 @@ def print_trainable_parameters(self) -> None: freeze_numel += np.prod(weight.shape) else: trainable_numel += np.prod(weight.shape) - logger.info( + logger.debug( f"Frozen parameters: {freeze_numel:.2e} || Trainable parameters:{trainable_numel:.2e} || Total parameters:{freeze_numel+trainable_numel:.2e}|| Trainable:{trainable_numel / (freeze_numel+trainable_numel):.2%}" ) diff --git a/paddlenlp/peft/prefix/prefix_model.py b/paddlenlp/peft/prefix/prefix_model.py index 6ca7865b373f..1ccf1f0bc594 100644 --- a/paddlenlp/peft/prefix/prefix_model.py +++ b/paddlenlp/peft/prefix/prefix_model.py @@ -282,7 +282,7 @@ def print_trainable_parameters(self) -> None: freeze_numel += np.prod(weight.shape) else: trainable_numel += np.prod(weight.shape) - logger.info( + logger.debug( f"Frozen parameters: {freeze_numel:.2e} || Trainable parameters:{trainable_numel:.2e} || Total parameters:{freeze_numel+trainable_numel:.2e}|| Trainable:{trainable_numel / (freeze_numel+trainable_numel):.2%}" ) diff --git a/paddlenlp/trainer/integrations.py b/paddlenlp/trainer/integrations.py index 60154e3b2a3f..f294247339f3 100644 --- a/paddlenlp/trainer/integrations.py +++ b/paddlenlp/trainer/integrations.py @@ -96,7 +96,7 @@ def on_train_begin(self, args, state, control, **kwargs): if self.vdl_writer is not None: self.vdl_writer.add_text("args", args.to_json_string()) - if "model" in kwargs: + if "model" in kwargs and logger.logger.level < 20: model = kwargs["model"] if isinstance(model, LoRAModel) or isinstance(model, PrefixModelForCausalLM): model = kwargs["model"].model diff --git a/paddlenlp/trainer/trainer.py b/paddlenlp/trainer/trainer.py index 70ce9033842b..b661ce172223 100644 --- a/paddlenlp/trainer/trainer.py +++ b/paddlenlp/trainer/trainer.py @@ -729,7 +729,7 @@ def train( # per_device_trainable_numel = sum(p.numel().item() for p in model.parameters() if not p.stop_gradient) # TODO: Temporary fix since Tensor.numel() not supported in distributed mode per_device_trainable_numel = sum(np.prod(p.shape) for p in model.parameters() if not p.stop_gradient) - logger.info(f" Number of trainable parameters = {per_device_trainable_numel:,} (per device)") + logger.debug(f" Number of trainable parameters = {per_device_trainable_numel:,} (per device)") if self.args.use_hybrid_parallel: # todo fix for pipeline_parallel_degree parts_num = max(self.args.tensor_parallel_degree, 1) * max(self.args.pipeline_parallel_degree, 1) @@ -745,7 +745,7 @@ def train( trainable_numel = trainable_numel // self.args.sep_parallel_degree # the numel is roughly, because the tensor parallel still hold own bias or layer_norm weight without splited # so, the trainable numel is a little bigger than real. - logger.info(f" Number of trainable parameters = {trainable_numel:,} (all devices, roughly)") + logger.debug(f" Number of trainable parameters = {trainable_numel:,} (all devices, roughly)") start_time = time.time() self._globalstep_last_start_time = time.time() @@ -2392,7 +2392,7 @@ def log(self, logs: Dict[str, float], **kwargs) -> None: kwargs.update(timer=self.timers, paddle_pipeline_timers=paddle_pipeline_timers) if self.state.epoch is not None: - logs["epoch"] = round(self.state.epoch, 4) + logs["progress_or_epoch"] = round(self.state.epoch, 4) output = {**logs, **{"step": self.state.global_step}} self.state.log_history.append(output) self.control = self.callback_handler.on_log(self.args, self.state, self.control, logs, **kwargs) @@ -2953,23 +2953,23 @@ def print_config(self, args=None, key=""): """ print config values """ - logger.info("=" * 60) + logger.debug("=" * 60) if args is None: args = self.args key = "Training" import paddlenlp - logger.info("{:^40}".format("{} Configuration Arguments".format(key))) - logger.info("{:30}: {}".format("paddle commit id", paddle.version.commit)) - logger.info("{:30}: {}".format("paddlenlp commit id", paddlenlp.version.commit)) + logger.debug("{:^40}".format("{} Configuration Arguments".format(key))) + logger.debug("{:30}: {}".format("paddle commit id", paddle.version.commit)) + logger.debug("{:30}: {}".format("paddlenlp commit id", paddlenlp.version.commit)) for a in dir(args): if a[:2] != "__": # don't print double underscore methods v = getattr(args, a) if not isinstance(v, types.MethodType): - logger.info("{:30}: {}".format(a, v)) + logger.debug("{:30}: {}".format(a, v)) - logger.info("") + logger.debug("") def is_unified_checkpoint(self, resume_from_checkpoint, safe_serialization=True): is_unified_checkpoint_type = False diff --git a/paddlenlp/trainer/trainer_callback.py b/paddlenlp/trainer/trainer_callback.py index 834ba5912e56..b263c7930daf 100644 --- a/paddlenlp/trainer/trainer_callback.py +++ b/paddlenlp/trainer/trainer_callback.py @@ -515,7 +515,7 @@ def on_log(self, args, state, control, logs=None, **kwargs): logs_str = ", ".join(f"{k}: {v}" for k, v in logs.items()) else: logs_str = str(logs) - self.training_bar.write(logs_str) + logger.info(logs_str) def on_train_end(self, args, state, control, **kwargs): if state.is_local_process_zero: diff --git a/paddlenlp/trainer/training_args.py b/paddlenlp/trainer/training_args.py index d6548a034d20..e73447734586 100644 --- a/paddlenlp/trainer/training_args.py +++ b/paddlenlp/trainer/training_args.py @@ -1703,21 +1703,21 @@ def print_config(self, args=None, key=""): """ print all config values. """ - logger.info("=" * 60) + logger.debug("=" * 60) if args is None: args = self key = "Training" import paddlenlp - logger.info("{:^40}".format("{} Configuration Arguments".format(key))) - logger.info("{:30}: {}".format("paddle commit id", paddle.version.commit)) - logger.info("{:30}: {}".format("paddlenlp commit id", paddlenlp.version.commit)) + logger.debug("{:^40}".format("{} Configuration Arguments".format(key))) + logger.debug("{:30}: {}".format("paddle commit id", paddle.version.commit)) + logger.debug("{:30}: {}".format("paddlenlp commit id", paddlenlp.version.commit)) for a in dir(args): if a[:2] != "__": # don't print double underscore methods v = getattr(args, a) if not isinstance(v, types.MethodType): - logger.info("{:30}: {}".format(a, v)) + logger.debug("{:30}: {}".format(a, v)) - logger.info("") + logger.debug("") diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py index ac8450d4fdcc..e252dfa5c788 100644 --- a/paddlenlp/transformers/model_utils.py +++ b/paddlenlp/transformers/model_utils.py @@ -1929,15 +1929,23 @@ def _find_mismatched_keys( raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}") if len(unexpected_keys) > 0: - logger.warning( - f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when" - f" initializing {model.__class__.__name__}: {sorted(unexpected_keys)}\n- This IS expected if you are" - f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or" - " with another architecture (e.g. initializing a BertForSequenceClassification model from a" - " BertForPreTraining model).\n- This IS NOT expected if you are initializing" - f" {model.__class__.__name__} from the checkpoint of a model that you expect to be exactly identical" - " (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." - ) + if logger.logger.level < 20: + logger.warning( + f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when" + f" initializing {model.__class__.__name__}: {sorted(unexpected_keys)}\n- This IS expected if you are" + f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or" + " with another architecture (e.g. initializing a BertForSequenceClassification model from a" + " BertForPreTraining model).\n- This IS NOT expected if you are initializing" + f" {model.__class__.__name__} from the checkpoint of a model that you expect to be exactly identical" + " (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)." + ) + else: + logger.warning( + f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when" + f" initializing the model, - This IS expected if you are" + f" initializing the model from a checkpoint of a model trained on another task or" + " with another architecture." + ) else: logger.info(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n")