Skip to content

Commit

Permalink
Enable Ruff C (complexity); autofix
Browse files Browse the repository at this point in the history
  • Loading branch information
akx committed Jan 31, 2024
1 parent 1a4d546 commit e60b007
Show file tree
Hide file tree
Showing 15 changed files with 43 additions and 45 deletions.
2 changes: 1 addition & 1 deletion examples/hello_world.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"pad_token_id": tokenizer.eos_token_id,
"max_new_tokens": 20,
}
response_tensor = ppo_trainer.generate([item for item in query_tensor], return_prompt=False, **generation_kwargs)
response_tensor = ppo_trainer.generate(list(query_tensor), return_prompt=False, **generation_kwargs)
response_txt = tokenizer.decode(response_tensor[0])

# 5. define a reward for response
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def preprocess_function(examples):


def collator(data):
return dict((key, [d[key] for d in data]) for key in data[0])
return {key: [d[key] for d in data] for key in data[0]}


# set seed before initializing value head for deterministic eval
Expand Down
6 changes: 2 additions & 4 deletions examples/research_projects/tools/triviaqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class ScriptArguments:

def data_generator():
for i in range(len(dataset)):
yield dataset[i]["question"], [item for item in dataset[i]["answer"]["normalized_aliases"]]
yield dataset[i]["question"], list(dataset[i]["answer"]["normalized_aliases"])


gen = data_generator()
Expand Down Expand Up @@ -187,8 +187,6 @@ def print_trainable_parameters(model):
"answer": [", ".join(item) for item in answers],
}
all_rewards = ppo_trainer.accelerator.gather(torch.tensor(rewards, device=ppo_trainer.accelerator.device))
ppo_trainer.log_stats(
train_stats, texts, [item for item in all_rewards], columns_to_log=["query", "response", "answer"]
)
ppo_trainer.log_stats(train_stats, texts, list(all_rewards), columns_to_log=["query", "response", "answer"])
if i % 100 == 0:
ppo_trainer.save_pretrained(f"models/{args.model_name}_{args.seed}_{i}_triviaqa")
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def tokenize(sample):


def collator(data):
return dict((key, [d[key] for d in data]) for key in data[0])
return {key: [d[key] for d in data] for key in data[0]}


# set seed before initializing value head for deterministic eval
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def tokenize(sample):


def collator(data):
return dict((key, [d[key] for d in data]) for key in data[0])
return {key: [d[key] for d in data] for key in data[0]}


# set seed before initializing value head for deterministic eval
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/ppo_multi_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def tokenize(example):


def collator(data):
return dict((key, [d[key] for d in data]) for key in data[0])
return {key: [d[key] for d in data] for key in data[0]}


config = PPOConfig(
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
target-version = "py37"
ignore = [
"B028", # warning without explicit stacklevel
"C408", # dict() calls (stylistic)
"C901", # function complexity
"E501",
]
extend-select = ["E", "F", "I", "W", "UP", "B", "T"]
extend-select = ["E", "F", "I", "W", "UP", "B", "T", "C"]
line-length = 119

[tool.ruff.per-file-ignores]
Expand Down
2 changes: 1 addition & 1 deletion scripts/stale.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def main():
open_issues = repo.get_issues(state="open")

for issue in open_issues:
comments = sorted([comment for comment in issue.get_comments()], key=lambda i: i.created_at, reverse=True)
comments = sorted(issue.get_comments(), key=lambda i: i.created_at, reverse=True)
last_comment = comments[0] if len(comments) > 0 else None
if (
last_comment is not None
Expand Down
2 changes: 1 addition & 1 deletion tests/test_no_peft.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_ppo_trainer_no_peft(self):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(0.0)]
# train model
train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

# check gradients are not None
Expand Down
44 changes: 21 additions & 23 deletions tests/test_ppo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def test_ppo_step(self):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(0.0)]
# train model
train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

for param in ppo_trainer.model.parameters():
Expand Down Expand Up @@ -230,9 +230,7 @@ def test_ppo_step_with_masks(self):
response_mask = [torch.ones_like(r) for r in response_tensor]

# train model
train_stats = ppo_trainer.step(
[q for q in query_tensor], [r for r in response_tensor], reward, response_mask
)
train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward, response_mask)
break

for param in ppo_trainer.model.parameters():
Expand Down Expand Up @@ -264,7 +262,7 @@ def test_ppo_step_with_no_ref_sgd(self):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(0.0)]
# train model
train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

for name, param in ppo_trainer.model.named_parameters():
Expand Down Expand Up @@ -304,8 +302,8 @@ def test_ppo_step_with_no_ref_sgd_lr_scheduler(self):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(0.0)]
# train model
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

for name, param in ppo_trainer.model.named_parameters():
Expand Down Expand Up @@ -341,7 +339,7 @@ def test_ppo_step_with_no_ref(self):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(0.0)]
# train model
train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

for name, param in ppo_trainer.model.named_parameters():
Expand Down Expand Up @@ -392,7 +390,7 @@ def test_ppo_step_with_no_ref_custom_layers(self):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(0.0)]
# train model
train_stats = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
train_stats = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

pattern = r".*transformer\.h\.(\d+)\..*"
Expand All @@ -405,7 +403,7 @@ def test_ppo_step_with_no_ref_custom_layers(self):
self.assertTrue(param.grad is None, f"Parameter {name} has a gradient")
else:
self.assertTrue(param.grad is not None, f"Parameter {name} has no gradient")
elif any([layer in name for layer in final_layers]):
elif any(layer in name for layer in final_layers):
self.assertTrue(param.grad is not None, f"Parameter {name} has no gradient")

# ref model should not be trained
Expand Down Expand Up @@ -460,11 +458,11 @@ def test_ppo_step_rewards_shape(self):
reward = [torch.tensor([[1.0]]), torch.tensor([[0.0]])]
# train model - this should raise an error
with self.assertRaises(ValueError):
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)

reward = [torch.tensor([1.0]), torch.tensor([0.0])]
# train model - this should work
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

# check if the gradients are computed for the model
Expand Down Expand Up @@ -499,7 +497,7 @@ def test_ppo_step_input_shape(self):
bs = ppo_trainer.config.batch_size

queries, responses, _, _ = ppo_trainer._step_safety_checker(
bs, [q for q in query_tensor], [r for r in response_tensor], reward
bs, list(query_tensor), list(response_tensor), reward
)

self.assertTrue(isinstance(queries, list), f"queries should be a list, got {type(queries)}")
Expand Down Expand Up @@ -704,7 +702,7 @@ def test_ppo_trainer_max_grad_norm(self):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(0.0)]
# train model
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

# check gradients
Expand Down Expand Up @@ -894,11 +892,11 @@ def make_inputs_require_grad(module, input, output):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(0.0)]
# train model by running a step twice
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)

ppo_trainer.model.train()
ppo_trainer.model.gradient_checkpointing_enable()
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

# check gradients
Expand Down Expand Up @@ -982,11 +980,11 @@ def make_inputs_require_grad(module, input, output):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(0.0)]
# train model by running a step twice
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)

ppo_trainer.model.train()
ppo_trainer.model.gradient_checkpointing_enable()
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

new_logits = ppo_trainer.model.compute_reward_score(dummy_inputs)
Expand Down Expand Up @@ -1092,11 +1090,11 @@ def make_inputs_require_grad(module, input, output):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(0.0)]
# train model by running a step twice
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)

ppo_trainer.model.train()
ppo_trainer.model.gradient_checkpointing_enable()
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

# check gradients
Expand Down Expand Up @@ -1162,7 +1160,7 @@ def test_grad_accumulation(self):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(1.0)]
# train model by running a step twice
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

model_grad = gpt2_model.v_head.summary.weight
Expand All @@ -1186,7 +1184,7 @@ def test_grad_accumulation(self):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(1.0)]
# train model by running a step twice
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

model_grad_acc = gpt2_model_clone.v_head.summary.weight
Expand Down Expand Up @@ -1224,7 +1222,7 @@ def test_push_to_hub_if_best_reward(self):
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0), torch.tensor(0.0)]
# train model
_ = ppo_trainer.step([q for q in query_tensor], [r for r in response_tensor], reward)
_ = ppo_trainer.step(list(query_tensor), list(response_tensor), reward)
break

def test_batch_size_check(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_reward_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def test_reward_trainer_peft(self):

# check gradients are not None
for n, param in trainer.model.named_parameters():
if any([t in n for t in trainable_params_name]):
if any(t in n for t in trainable_params_name):
previous_trainable_params[n] = param.clone()
else:
previous_non_trainable_params[n] = param.clone()
Expand Down
2 changes: 1 addition & 1 deletion trl/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def stack_dicts(stats_dicts: List[Dict]) -> Dict:

def add_suffix(input_dict: Dict, suffix: str) -> Dict:
"""Add suffix to dict keys."""
return dict((k + suffix, v) for k, v in input_dict.items())
return {k + suffix: v for k, v in input_dict.items()}


def pad_to_size(tensor: torch.Tensor, size: int, dim: int = 1, padding: int = 50256) -> torch.Tensor:
Expand Down
6 changes: 3 additions & 3 deletions trl/environment/base_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __call__(self, input_ids, scores, **kwargs):
done = []

for i, decoded_generation in enumerate(decoded_generations):
sequence_complete = any([stop_string in decoded_generation for stop_string in self.stop_strings])
sequence_complete = any(stop_string in decoded_generation for stop_string in self.stop_strings)
done.append(sequence_complete)
if not sequence_complete:
self.generated_tokens[i] += 1
Expand Down Expand Up @@ -243,7 +243,7 @@ def __init__(
if isinstance(tools, dict):
self.tools = tools
else:
self.tools = dict([(tool.__class__.__name__, tool) for tool in tools])
self.tools = {tool.__class__.__name__: tool for tool in tools}
self.reward_fn = reward_fn
self.max_length = max_length
self.request_token = "<request>"
Expand Down Expand Up @@ -278,7 +278,7 @@ def run(self, queries, **rewards_kwargs):

histories = [TextHistory(q, qt, system=True) for q, qt in zip(queries, queries_tokens)]

while any([not history.completed for history in histories]) and turns < self.max_turns:
while any(not history.completed for history in histories) and turns < self.max_turns:
histories = self.generate(histories)
histories = self.tasks_end_check(histories)
# TODO: make this parallel rather than for-loop
Expand Down
8 changes: 4 additions & 4 deletions trl/models/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def _get_checkpoint_from_hub(
# check filename with `v_head` or any known extra module:
files_to_download = set()
for k, v in index["weight_map"].items():
if any([module in k for module in cls.supported_modules]):
if any(module in k for module in cls.supported_modules):
files_to_download.add(v)
is_sharded = True

Expand Down Expand Up @@ -487,7 +487,7 @@ def add_and_load_reward_modeling_adapter(
adapter_state_dict = loading_func(local_filename, **load_kwargs)

for score_name_candidate in cls.supported_rm_modules:
if any([score_name_candidate in name for name in adapter_state_dict.keys()]):
if any(score_name_candidate in name for name in adapter_state_dict.keys()):
score_name = score_name_candidate
# we have found the correct head name and can break
break
Expand All @@ -500,7 +500,7 @@ def add_and_load_reward_modeling_adapter(
score_dict[key_name] = param.to(cls._get_current_device())

num_labels, hidden_dim = score_dict["weight"].shape
has_bias = any(["bias" in name for name in adapter_state_dict.keys()])
has_bias = any("bias" in name for name in adapter_state_dict.keys())

score = nn.Linear(hidden_dim, num_labels, bias=has_bias).to(
device=cls._get_current_device(),
Expand Down Expand Up @@ -638,7 +638,7 @@ def create_reference_model(
else:
for pattern_candidate in LAYER_PATTERNS:
pattern_candidate = pattern_candidate.format(layer=num_shared_layers)
if any([pattern_candidate in name for name in parameter_names]):
if any(pattern_candidate in name for name in parameter_names):
pattern = pattern_candidate
break

Expand Down
2 changes: 1 addition & 1 deletion trl/trainer/ppo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1337,7 +1337,7 @@ def log_stats(
if self.config.log_with == "wandb":
import wandb

if any([column_to_log not in batch.keys() for column_to_log in columns_to_log]):
if any(column_to_log not in batch.keys() for column_to_log in columns_to_log):
raise ValueError(f"Columns to log {columns_to_log} are not present in the batch {batch.keys()}.")

batch_list = [batch[column_to_log] for column_to_log in columns_to_log]
Expand Down

0 comments on commit e60b007

Please sign in to comment.