huggingface · MayankChaturvedi · Sep 4, 2024 · Sep 4, 2024 · Sep 4, 2024 · Sep 5, 2024
diff --git a/examples/4_train_policy_with_script.md b/examples/4_train_policy_with_script.md
@@ -177,7 +177,7 @@ When you start the training process, you will first see your full configuration
 After that, you will see training log like this one:
 
 ```
-INFO 2024-08-14 13:35:12 ts/train.py:192 step:0 smpl:64 ep:1 epch:0.00 loss:1.112 grdn:15.387 lr:2.0e-07 updt_s:1.738 data_s:4.774
+INFO 2024-08-14 13:35:12 ts/train.py:192 smpl:2K ep:3 epch:0.06 loss:3.706 grdn:94.749 lr:1.0e-05 updt_max|avg:1472|1159 data_max|avg:22|10
 ```
 
 or evaluation log like:
@@ -186,17 +186,7 @@ or evaluation log like:
 INFO 2024-08-14 13:38:45 ts/train.py:226 step:100 smpl:6K ep:52 epch:0.25 ∑rwrd:20.693 success:0.0% eval_s:120.266
 ```
 
-These logs will also be saved in wandb if `wandb.enable` is set to `true`. Here are the meaning of some abbreviations:
-
-- `smpl`: number of samples seen during training.
-- `ep`: number of episodes seen during training. An episode contains multiple samples in a complete manipulation task.
-- `epch`: number of time all unique samples are seen (epoch).
-- `grdn`: gradient norm.
-- `∑rwrd`: compute the sum of rewards in every evaluation episode and then take an average of them.
-- `success`: average success rate of eval episodes. Reward and success are usually different except for the sparsing reward setting, where reward=1 only when the task is completed successfully.
-- `eval_s`: time to evaluate the policy in the environment, in second.
-- `updt_s`: time to update the network parameters, in second.
-- `data_s`: time to load a batch of data, in second.
+These logs will also be saved in wandb if `wandb.enable` is set to `true`. The meaning of logging abbreviations are present in the [training](https://github.com/huggingface/lerobot/blob/429a463aff2f9b55b44055916a41e7b05d107f48/lerobot/scripts/train.py#L177) and [evaluation](https://github.com/huggingface/lerobot/blob/429a463aff2f9b55b44055916a41e7b05d107f48/lerobot/scripts/train.py#L214) code comments.
 
 Some metrics are useful for initial performance profiling. For example, if you find the current GPU utilization is low via the `nvidia-smi` command and `data_s` sometimes is too high, you may need to modify batch size or number of dataloading workers to accelerate dataloading. We also recommend [pytorch profiler](https://github.com/huggingface/lerobot?tab=readme-ov-file#improve-your-code-with-profiling) for detailed performance probing.
 

diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py
@@ -165,8 +165,10 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_online):
     loss = info["loss"]
     grad_norm = info["grad_norm"]
     lr = info["lr"]
-    update_s = info["update_s"]
-    dataloading_s = info["dataloading_s"]
+    avg_data_loading_s = info["avg_data_loading_s"]
+    max_data_loading_s = info["max_data_loading_s"]
+    avg_policy_updating_s = info["avg_policy_updating_s"]
+    max_policy_updating_s = info["max_policy_updating_s"]
 
     # A sample is an (observation,action) pair, where observation and action
     # can be on multiple timestamps. In a batch, we have `batch_size`` number of samples.
@@ -175,19 +177,22 @@ def log_train_info(logger: Logger, info, step, cfg, dataset, is_online):
     num_episodes = num_samples / avg_samples_per_ep
     num_epochs = num_samples / dataset.num_samples
     log_items = [
-        f"step:{format_big_number(step)}",
         # number of samples seen during training
         f"smpl:{format_big_number(num_samples)}",
         # number of episodes seen during training
         f"ep:{format_big_number(num_episodes)}",
-        # number of time all unique samples are seen
+        # number of passes through all of the training samples since the start of training
         f"epch:{num_epochs:.2f}",
+        # loss in the past step
         f"loss:{loss:.3f}",
+        # gradient norm in the past step
         f"grdn:{grad_norm:.3f}",
+        # learning rate at the end of the past step
         f"lr:{lr:0.1e}",
-        # in seconds
-        f"updt_s:{update_s:.3f}",
-        f"data_s:{dataloading_s:.3f}",  # if not ~0, you are bottlenecked by cpu or io
+        # time taken for a policy update (forward + backward + optimizer step) in milliseconds. Includes the maximum and average over all training steps since the last log.
+        f"updt_max|avg:{round(max_policy_updating_s * 1000)}|{round(avg_policy_updating_s * 1000)}",
+        # data loading time in milliseconds. Includes the maximum and average over all training steps since the last log.
+        f"data_max|avg:{round(max_data_loading_s *1000)}|{round(avg_data_loading_s*1000)}",  # if not ~0, you are bottlenecked by cpu or io
     ]
     logging.info(" ".join(log_items))
 
@@ -416,13 +421,17 @@ def evaluate_and_checkpoint_if_needed(step, is_online):
 
     policy.train()
     offline_step = 0
+    data_loading_times = []
+    policy_updating_times = []
+
     for _ in range(step, cfg.training.offline_steps):
         if offline_step == 0:
             logging.info("Start offline training on a fixed dataset")
 
         start_time = time.perf_counter()
         batch = next(dl_iter)
         dataloading_s = time.perf_counter() - start_time
+        data_loading_times.append(dataloading_s)
 
         for key in batch:
             batch[key] = batch[key].to(device, non_blocking=True)
@@ -437,11 +446,20 @@ def evaluate_and_checkpoint_if_needed(step, is_online):
             use_amp=cfg.use_amp,
         )
 
-        train_info["dataloading_s"] = dataloading_s
+        policy_updating_times.append(train_info["update_s"])
 
         if step % cfg.training.log_freq == 0:
+            train_info["avg_data_loading_s"] = np.mean(data_loading_times)
+            train_info["max_data_loading_s"] = np.max(data_loading_times)
+
+            train_info["avg_policy_updating_s"] = np.mean(policy_updating_times)
+            train_info["max_policy_updating_s"] = np.max(policy_updating_times)
+
             log_train_info(logger, train_info, step, cfg, offline_dataset, is_online=False)
 
+            data_loading_times = []
+            policy_updating_times = []
+
         # Note: evaluate_and_checkpoint_if_needed happens **after** the `step`th training update has completed,
         # so we pass in step + 1.
         evaluate_and_checkpoint_if_needed(step + 1, is_online=False)
@@ -595,6 +613,9 @@ def sample_trajectory_and_update_buffer():
             continue
 
         policy.train()
+        data_loading_times = []
+        policy_updating_times = []
+
         for _ in range(cfg.training.online_steps_between_rollouts):
             with lock:
                 start_time = time.perf_counter()
@@ -615,16 +636,26 @@ def sample_trajectory_and_update_buffer():
                 lock=lock,
             )
 
-            train_info["dataloading_s"] = dataloading_s
+            data_loading_times.append(dataloading_s)
+            policy_updating_times.append(train_info["update_s"])
             train_info["online_rollout_s"] = online_rollout_s
             train_info["update_online_buffer_s"] = update_online_buffer_s
             train_info["await_update_online_buffer_s"] = await_update_online_buffer_s
             with lock:
                 train_info["online_buffer_size"] = len(online_dataset)
 
             if step % cfg.training.log_freq == 0:
+                train_info["avg_data_loading_s"] = np.mean(data_loading_times)
+                train_info["max_data_loading_s"] = np.max(data_loading_times)
+
+                train_info["avg_policy_updating_s"] = np.mean(policy_updating_times)
+                train_info["max_policy_updating_s"] = np.max(policy_updating_times)
+
                 log_train_info(logger, train_info, step, cfg, online_dataset, is_online=True)
 
+                data_loading_times = []
+                policy_updating_times = []
+
             # Note: evaluate_and_checkpoint_if_needed happens **after** the `step`th training update has completed,
             # so we pass in step + 1.
             evaluate_and_checkpoint_if_needed(step + 1, is_online=True)