Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding aggregated logs for training run #411

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 2 additions & 12 deletions examples/4_train_policy_with_script.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ When you start the training process, you will first see your full configuration
After that, you will see training log like this one:

```
INFO 2024-08-14 13:35:12 ts/train.py:192 step:0 smpl:64 ep:1 epch:0.00 loss:1.112 grdn:15.387 lr:2.0e-07 updt_s:1.738 data_s:4.774
INFO 2024-08-14 13:35:12 ts/train.py:192 smpl:2K ep:3 epch:0.06 loss:3.706 grdn:94.749 lr:1.0e-05 updt_max|avg:1472|1159 data_max|avg:22|10
```

or evaluation log like:
Expand All @@ -186,17 +186,7 @@ or evaluation log like:
INFO 2024-08-14 13:38:45 ts/train.py:226 step:100 smpl:6K ep:52 epch:0.25 ∑rwrd:20.693 success:0.0% eval_s:120.266
```

These logs will also be saved in wandb if `wandb.enable` is set to `true`. Here are the meaning of some abbreviations:

- `smpl`: number of samples seen during training.
- `ep`: number of episodes seen during training. An episode contains multiple samples in a complete manipulation task.
- `epch`: number of time all unique samples are seen (epoch).
- `grdn`: gradient norm.
- `∑rwrd`: compute the sum of rewards in every evaluation episode and then take an average of them.
- `success`: average success rate of eval episodes. Reward and success are usually different except for the sparsing reward setting, where reward=1 only when the task is completed successfully.
- `eval_s`: time to evaluate the policy in the environment, in second.
- `updt_s`: time to update the network parameters, in second.
- `data_s`: time to load a batch of data, in second.
These logs will also be saved in wandb if `wandb.enable` is set to `true`. The meaning of logging abbreviations are present in the [training](https://github.com/huggingface/lerobot/blob/429a463aff2f9b55b44055916a41e7b05d107f48/lerobot/scripts/train.py#L177) and [evaluation](https://github.com/huggingface/lerobot/blob/429a463aff2f9b55b44055916a41e7b05d107f48/lerobot/scripts/train.py#L214) code comments.

Some metrics are useful for initial performance profiling. For example, if you find the current GPU utilization is low via the `nvidia-smi` command and `data_s` sometimes is too high, you may need to modify batch size or number of dataloading workers to accelerate dataloading. We also recommend [pytorch profiler](https://github.com/huggingface/lerobot?tab=readme-ov-file#improve-your-code-with-profiling) for detailed performance probing.

Expand Down
49 changes: 40 additions & 9 deletions lerobot/scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,10 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_online):
loss = info["loss"]
grad_norm = info["grad_norm"]
lr = info["lr"]
update_s = info["update_s"]
dataloading_s = info["dataloading_s"]
avg_data_loading_s = info["avg_data_loading_s"]
max_data_loading_s = info["max_data_loading_s"]
avg_policy_updating_s = info["avg_policy_updating_s"]
max_policy_updating_s = info["max_policy_updating_s"]

# A sample is an (observation,action) pair, where observation and action
# can be on multiple timestamps. In a batch, we have `batch_size`` number of samples.
Expand All @@ -175,19 +177,22 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_online):
num_episodes = num_samples / avg_samples_per_ep
num_epochs = num_samples / dataset.num_samples
log_items = [
f"step:{format_big_number(step)}",
# number of samples seen during training
f"smpl:{format_big_number(num_samples)}",
# number of episodes seen during training
f"ep:{format_big_number(num_episodes)}",
# number of time all unique samples are seen
# number of passes through all of the training samples since the start of training
f"epch:{num_epochs:.2f}",
# loss in the past step
f"loss:{loss:.3f}",
# gradient norm in the past step
f"grdn:{grad_norm:.3f}",
# learning rate at the end of the past step
f"lr:{lr:0.1e}",
# in seconds
f"updt_s:{update_s:.3f}",
f"data_s:{dataloading_s:.3f}", # if not ~0, you are bottlenecked by cpu or io
# time taken for a policy update (forward + backward + optimizer step) in milliseconds. Includes the maximum and average over all training steps since the last log.
f"updt_max|avg:{round(max_policy_updating_s * 1000)}|{round(avg_policy_updating_s * 1000)}",
# data loading time in milliseconds. Includes the maximum and average over all training steps since the last log.
f"data_max|avg:{round(max_data_loading_s *1000)}|{round(avg_data_loading_s*1000)}", # if not ~0, you are bottlenecked by cpu or io
]
logging.info(" ".join(log_items))

Expand Down Expand Up @@ -416,13 +421,17 @@ def evaluate_and_checkpoint_if_needed(step, is_online):

policy.train()
offline_step = 0
data_loading_times = []
policy_updating_times = []

for _ in range(step, cfg.training.offline_steps):
if offline_step == 0:
logging.info("Start offline training on a fixed dataset")

start_time = time.perf_counter()
batch = next(dl_iter)
dataloading_s = time.perf_counter() - start_time
data_loading_times.append(dataloading_s)

for key in batch:
batch[key] = batch[key].to(device, non_blocking=True)
Expand All @@ -437,11 +446,20 @@ def evaluate_and_checkpoint_if_needed(step, is_online):
use_amp=cfg.use_amp,
)

train_info["dataloading_s"] = dataloading_s
policy_updating_times.append(train_info["update_s"])

if step % cfg.training.log_freq == 0:
train_info["avg_data_loading_s"] = np.mean(data_loading_times)
train_info["max_data_loading_s"] = np.max(data_loading_times)

train_info["avg_policy_updating_s"] = np.mean(policy_updating_times)
train_info["max_policy_updating_s"] = np.max(policy_updating_times)

log_train_info(logger, train_info, step, cfg, offline_dataset, is_online=False)

data_loading_times = []
policy_updating_times = []

# Note: evaluate_and_checkpoint_if_needed happens **after** the `step`th training update has completed,
# so we pass in step + 1.
evaluate_and_checkpoint_if_needed(step + 1, is_online=False)
Expand Down Expand Up @@ -595,6 +613,9 @@ def sample_trajectory_and_update_buffer():
continue

policy.train()
data_loading_times = []
policy_updating_times = []

for _ in range(cfg.training.online_steps_between_rollouts):
with lock:
start_time = time.perf_counter()
Expand All @@ -615,16 +636,26 @@ def sample_trajectory_and_update_buffer():
lock=lock,
)

train_info["dataloading_s"] = dataloading_s
data_loading_times.append(dataloading_s)
policy_updating_times.append(train_info["update_s"])
train_info["online_rollout_s"] = online_rollout_s
train_info["update_online_buffer_s"] = update_online_buffer_s
train_info["await_update_online_buffer_s"] = await_update_online_buffer_s
with lock:
train_info["online_buffer_size"] = len(online_dataset)

if step % cfg.training.log_freq == 0:
train_info["avg_data_loading_s"] = np.mean(data_loading_times)
train_info["max_data_loading_s"] = np.max(data_loading_times)

train_info["avg_policy_updating_s"] = np.mean(policy_updating_times)
train_info["max_policy_updating_s"] = np.max(policy_updating_times)

log_train_info(logger, train_info, step, cfg, online_dataset, is_online=True)

data_loading_times = []
policy_updating_times = []

# Note: evaluate_and_checkpoint_if_needed happens **after** the `step`th training update has completed,
# so we pass in step + 1.
evaluate_and_checkpoint_if_needed(step + 1, is_online=True)
Expand Down
Loading