Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
fix fmt issue
Browse files Browse the repository at this point in the history
Signed-off-by: Wang, Yi <yi.a.wang@intel.com>
  • Loading branch information
sywangyi committed Nov 14, 2023
1 parent 9fa5d27 commit 8e1ad14
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ datasets
bitsandbytes
evaluate
scikit-learn
intel-extension-for-transformers
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def is_optimum_habana_available():


if is_optimum_habana_available():
from optimum.habana.accelerate import GaudiAccelerator as Accelerator
from optimum.habana.accelerate import GaudiAccelerator as Accelerator # pylint: disable=E0611, E0401
else:
from accelerate import Accelerator

Expand All @@ -68,7 +68,7 @@ def is_optimum_habana_available():
from peft.peft_model import set_peft_model_state_dict

if is_transformers_greater_than("4.33.0"):
from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled # pylint: disable=E0611, E0401
else:
from transformers.deepspeed import is_deepspeed_zero3_enabled

Expand Down Expand Up @@ -170,7 +170,8 @@ class and the arguments that are specific to trl models. The kwargs

if reward_adapter is not None and not isinstance(reward_adapter, str):
raise ValueError(
"The `reward_adapter` argument should be a string representing the name of local path or the Hub id to the Reward Modeling adapter."
"The `reward_adapter` argument should be a string representing the name of local path or the Hub id to"
"the Reward Modeling adapter."
)

is_peft_model = False
Expand Down Expand Up @@ -670,7 +671,8 @@ def create_reference_model(
Args:
model (`PreTrainedModelWrapper`): The model to be copied.
num_shared_layers (`int`, *optional*): The number of initial layers that are shared between both models and kept frozen.
num_shared_layers (`int`, *optional*): The number of initial layers that are shared between both models and
kept frozen.
pattern (`str`, *optional*): The shared layers are selected with a string pattern
(e.g. "transformer.h.{layer}" for GPT2) and if a custom pattern is necessary it can be passed here.
Expand All @@ -679,7 +681,8 @@ def create_reference_model(
"""
if is_deepspeed_zero3_enabled():
raise ValueError(
"DeepSpeed ZeRO-3 is enabled and is not compatible with `create_reference_model()`. Please instantiate your reference model directly with `AutoCausalLM.from_pretrained()`."
"DeepSpeed ZeRO-3 is enabled and is not compatible with `create_reference_model()`. Please instantiate "
"your reference model directly with `AutoCausalLM.from_pretrained()`."
)

parameter_names = [n for n, _ in model.named_parameters()]
Expand Down
17 changes: 11 additions & 6 deletions intel_extension_for_transformers/transformers/ppo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from typing import Literal, Optional

import numpy as np
import tyro
import tyro # pylint: disable=E0611, E0401
from typing_extensions import Annotated
from .ppo_core import flatten_dict

Expand Down Expand Up @@ -53,7 +53,8 @@ class PPOConfig:
seed: int = 0
"""Seed value for random generations"""
log_with: Optional[Literal["wandb", "tensorboard"]] = None
"""Log with either 'wandb' or 'tensorboard', check https://huggingface.co/docs/accelerate/usage_guides/tracking for more details"""
"""Log with either 'wandb' or 'tensorboard', check https://huggingface.co/docs/accelerate/usage_guides/tracking
for more details"""
task_name: Optional[str] = None
"""Name of task to use - used only for tracking purposes"""
model_name: Optional[str] = None
Expand All @@ -65,7 +66,8 @@ class PPOConfig:
remove_unused_columns: bool = True
"""Remove unused columns from the dataset if `datasets.Dataset` is used"""
tracker_kwargs: JSONDict = field(default_factory=dict)
"""Keyword arguments for the tracker (e.g. python ppo.py --ppo_config.tracker_kwargs='{"wandb": {"entity": "my_wandb_entity", "name": "my_exp_name"}}'"""
"""Keyword arguments for the tracker (e.g. python ppo.py --ppo_config.tracker_kwargs='{"wandb": {"entity":
"my_wandb_entity", "name": "my_exp_name"}}'"""
accelerator_kwargs: JSONDict = field(default_factory=dict)
"""Keyword arguments for the accelerator"""
project_kwargs: JSONDict = field(default_factory=dict)
Expand All @@ -85,7 +87,8 @@ class PPOConfig:
init_kl_coef: Optional[float] = 0.2
"""Initial KL penalty coefficient (used for adaptive and linear control)"""
kl_penalty: Literal["kl", "abs", "mse", "full"] = "kl"
"""kl penalty options: 'kl': model_logp - ref_logp, 'abs': abs(kl), 'mse': mean squared error mse(kl) and 'full': the actual kl for all tokens in the distribution"""
"""kl penalty options: 'kl': model_logp - ref_logp, 'abs': abs(kl), 'mse': mean squared error mse(kl) and 'full':
the actual kl for all tokens in the distribution"""
target: Optional[float] = 6
"""Target KL value for adaptive KL control"""
horizon: Optional[float] = 10000
Expand Down Expand Up @@ -164,7 +167,9 @@ class PPOConfig:
def __post_init__(self):
if self.forward_batch_size is not None:
warnings.warn(
"Note that using `forward_batch_size` is deprecated, use `mini_batch_size` instead. By setting it you overwrite `mini_batch_size` which affects both the batch size during forward passes and also the mini batch size for PPO optimization."
"Note that using `forward_batch_size` is deprecated, use `mini_batch_size` instead. By setting it you "
"overwrite `mini_batch_size` which affects both the batch size during forward passes and also the mini "
"batch size for PPO optimization."
)
self.mini_batch_size = self.forward_batch_size

Expand All @@ -181,7 +186,7 @@ def __post_init__(self):
self.total_ppo_epochs = int(np.ceil(self.steps / self.batch_size))

if self.use_habana:
from optimum.habana.transformers.modeling_utils import (
from optimum.habana.transformers.modeling_utils import ( # pylint: disable=E0611, E0401
adapt_transformers_to_gaudi,
)

Expand Down
2 changes: 1 addition & 1 deletion intel_extension_for_transformers/transformers/ppo_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def set_seed(seed: int):
torch.cuda.manual_seed_all(seed)

if is_optimum_available() and importlib.util.find_spec("optimum.habana") != None:
from habana_frameworks.torch.hpu import random as hpu_random
from habana_frameworks.torch.hpu import random as hpu_random # pylint: disable=E0611, E0401

hpu_random.manual_seed_all(seed)

Expand Down
58 changes: 42 additions & 16 deletions intel_extension_for_transformers/transformers/ppo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,25 @@
logger = logging.getLogger(__name__)



@torch.no_grad()
def get_global_statistics(accelerator, xs: torch.Tensor, mask=None, device="cpu") -> Tuple[float, float, int]:
"""
Computes element-wise mean and variance of the tensor across processes. Reference:
https://github.com/OpenLMLab/MOSS-RLHF/blob/40b91eb2f2b71b16919addede0341d2bef70825d/utils.py#L57C1-L73C75
"""
xs = xs.to(accelerator.device)
sum_and_count = torch.tensor([xs.sum(), (xs.numel() if mask is None else mask.sum())], device=xs.device)
sum_and_count = accelerator.reduce(sum_and_count)
global_sum, count = sum_and_count
global_mean = global_sum / count

sum_var = torch.sum(((xs - global_mean) ** 2).mul(1 if mask is None else mask))
sum_var = accelerator.reduce(sum_var)
global_var = sum_var / count

return global_mean.to(device), global_var.to(device), count.to(device)

class RunningMoments:
def __init__(self, accelerator):
"""
Expand Down Expand Up @@ -158,7 +177,7 @@ def is_torch_greater_2_0() -> bool:


if is_deepspeed_available():
import deepspeed
import deepspeed # pylint: disable=E0611, E0401

MODEL_CARD_TEMPLATE = """---
license: apache-2.0
Expand Down Expand Up @@ -286,15 +305,17 @@ def __init__(
raise ValueError(f"config must be a PPOConfig, got {type(config)}")
if not isinstance(tokenizer, (PreTrainedTokenizerBase)):
raise ValueError(
f"tokenizer must be a PreTrainedTokenizerBase like a PreTrainedTokenizer or a PreTrainedTokenizerFast, got {type(tokenizer)}"
f"tokenizer must be a PreTrainedTokenizerBase like a PreTrainedTokenizer or a PreTrainedTokenizerFast"
" got {type(tokenizer)}"
)
if not isinstance(model, (SUPPORTED_ARCHITECTURES)):
raise ValueError(
f"model must be a PreTrainedModelWrapper, got {type(model)} - supported architectures are: {SUPPORTED_ARCHITECTURES}"
f"model must be a PreTrainedModelWrapper, got {type(model)} - supported architectures are: "
"{SUPPORTED_ARCHITECTURES}"
)
# Step 1: Initialize Accelerator
if config.use_habana:
from optimum.habana.accelerate import GaudiAccelerator as Accelerator
from optimum.habana.accelerate import GaudiAccelerator as Accelerator # pylint: disable=E0611, E0401
else:
from accelerate import Accelerator
self.accelerator = Accelerator(
Expand Down Expand Up @@ -411,7 +432,8 @@ def __init__(

if not isinstance(self.lr_scheduler, lr_scheduler_class):
raise ValueError(
"lr_scheduler must be a torch.optim.lr_scheduler._LRScheduler or torch.optim.lr_scheduler.LRScheduler (for torch >= 2.0)"
"lr_scheduler must be a torch.optim.lr_scheduler._LRScheduler or torch.optim.lr_scheduler."
" LRScheduler (for torch >= 2.0)"
)

if self.config.adap_kl_ctrl:
Expand Down Expand Up @@ -487,8 +509,8 @@ def __init__(
self.running = RunningMoments(self.accelerator)

if config.use_habana:
import habana_frameworks.torch.core as htcore
from habana_frameworks.torch.hpu import wrap_in_hpu_graph
import habana_frameworks.torch.core as htcore # pylint: disable=E0611, E0401
from habana_frameworks.torch.hpu import wrap_in_hpu_graph # pylint: disable=E0611, E0401

self.htcore = htcore
model = self.accelerator.unwrap_model(self.model)
Expand Down Expand Up @@ -764,7 +786,8 @@ def _step_safety_checker(
)
if batch_size is not None and len(tensor_list) != batch_size:
raise ValueError(
f"Batch size ({batch_size}) does not match number of examples - but got {len(tensor_list)} for: {name}"
f"Batch size ({batch_size}) does not match number of examples - but got {len(tensor_list)} for: "
"{name}"
)

# add queries, scores and responses on the correct device
Expand Down Expand Up @@ -1068,8 +1091,8 @@ def step(

def _early_stop(self, policykl):
r"""
Handles the early stopping logic. If the policy KL is greater than the target KL, then the gradient is zeroed and
the optimization step is skipped.
Handles the early stopping logic. If the policy KL is greater than the target KL, then the gradient is zeroed
and the optimization step is skipped.
This also handles the multi-gpu case where the policy KL is averaged across all processes.
Args:
Expand Down Expand Up @@ -1485,7 +1508,8 @@ def loss(
avg_ratio = masked_mean(ratio, mask).item()
if avg_ratio > self.config.ratio_threshold:
warnings.warn(
f"The average ratio of batch ({avg_ratio:.2f}) exceeds threshold {self.config.ratio_threshold:.2f}. Skipping batch."
f"The average ratio of batch ({avg_ratio:.2f}) exceeds threshold {self.config.ratio_threshold:.2f}. "
"Skipping batch."
)
pg_loss = pg_loss * 0.0
vf_loss = vf_loss * 0.0
Expand Down Expand Up @@ -1553,7 +1577,8 @@ def record_step_stats(self, kl_coef: float, **data):
if mean_kl.item() < -1.0:
# warn users
warnings.warn(
f"KL divergence is starting to become negative: {mean_kl.item():.2f} - this might be a precursor for failed training."
f"KL divergence is starting to become negative: {mean_kl.item():.2f} - this might be a precursor for "
" failed training."
" sometimes this happens because the generation kwargs are not correctly set. Please make sure"
" that the generation kwargs are set correctly, or review your training hyperparameters."
)
Expand Down Expand Up @@ -1631,7 +1656,7 @@ def log_stats(
elif self.config.log_with == "wandb":
if importlib.util.find_spec("wandb") is None:
raise ImportError("import wandb error")
import wandb
import wandb # pylint: disable=E0611, E0401

if any(
[
Expand Down Expand Up @@ -1744,7 +1769,6 @@ def _show_tokens(self, tokens, masks):
print(text)

def _prepare_deepspeed(self, model: PreTrainedModelWrapper):
# Adapted from accelerate: https://github.com/huggingface/accelerate/blob/739b135f8367becb67ffaada12fe76e3aa60fefd/src/accelerate/accelerator.py#L1473
deepspeed_plugin = self.accelerator.state.deepspeed_plugin
config_kwargs = deepspeed_plugin.deepspeed_config
if model is not None:
Expand All @@ -1758,7 +1782,8 @@ def _prepare_deepspeed(self, model: PreTrainedModelWrapper):
hidden_size is not None
and config_kwargs["zero_optimization"]["stage"] == 3
):
# Note that `stage3_prefetch_bucket_size` can produce DeepSpeed messages like: `Invalidate trace cache @ step 0: expected module 1, but got module 0`
# Note that `stage3_prefetch_bucket_size` can produce DeepSpeed messages like: `Invalidate trace
# cache @ step 0: expected module 1, but got module 0`
# This is expected and is not an error, see: https://github.com/microsoft/DeepSpeed/discussions/4081
config_kwargs.update(
{
Expand All @@ -1773,7 +1798,8 @@ def _prepare_deepspeed(self, model: PreTrainedModelWrapper):
)

# If ZeRO-3 is used, we shard both the active and reference model.
# Otherwise, we assume the reference model fits in memory and is initialized on each device with ZeRO disabled (stage 0)
# Otherwise, we assume the reference model fits in memory and is initialized on each device with ZeRO disabled
# (stage 0)
if config_kwargs["zero_optimization"]["stage"] != 3:
config_kwargs["zero_optimization"]["stage"] = 0
model, *_ = deepspeed.initialize(model=model, config=config_kwargs)
Expand Down

0 comments on commit 8e1ad14

Please sign in to comment.