Working monkey patch

linkedin · Sep 3, 2024 · 9e0857e · 9e0857e
1 parent bed07d5
commit 9e0857e
Show file tree

Hide file tree

Showing 7 changed files with 131 additions and 98 deletions.
diff --git a/examples/huggingface/llama_B_48_no_patching.log b/examples/huggingface/llama_B_48_no_patching.log
diff --git a/examples/huggingface/llama_B_48_post_init_class_patching.log b/examples/huggingface/llama_B_48_post_init_class_patching.log
diff --git a/examples/huggingface/llama_B_48_post_init_instance_patching.log b/examples/huggingface/llama_B_48_post_init_instance_patching.log
@@ -1,16 +1,21 @@
-***** Pre-init original model *****
-***** Pre-init original model *****
-***** Pre-init original model *****
-***** Pre-init original model *****
-***** Post-init original model *****
-***** Pre-Apply Liger Kernel *****
-***** Post-init original model *****
-***** Pre-Apply Liger Kernel *****
-***** Post-init original model *****
-***** Pre-Apply Liger Kernel *****
-***** Post-init original model *****
-***** Pre-Apply Liger Kernel *****
-***** Post-Apply Liger Kernel *****
-***** Post-Apply Liger Kernel *****
-***** Post-Apply Liger Kernel *****
-***** Post-Apply Liger Kernel *****
+{'loss': 13.3485, 'grad_norm': 535.4854736328125, 'learning_rate': 3e-06, 'epoch': 0.0, 'num_input_tokens_seen': 75264}
+{'loss': 13.3631, 'grad_norm': 530.2271118164062, 'learning_rate': 6e-06, 'epoch': 0.01, 'num_input_tokens_seen': 132864, 'step': 2, 'step_time_sec': 4.92, 'avg_step_time_sec': 4.92, 'time_to_completion_sec': 88.62, 'estimated_total_time_sec': 98.47, 'step_peak_memory_allocated_MB': 38307.89, 'step_peak_memory_reserved_MB': 54898.0, 'total_peak_memory_allocated_MB': 38307.89, 'total_peak_memory_reserved_MB': 54898.0, 'step_tokens_per_second': 11698.89, 'avg_tokens_per_second': 11698.89}
+{'loss': 10.8637, 'grad_norm': 380.6391296386719, 'learning_rate': 5.954423259036625e-06, 'epoch': 0.01, 'num_input_tokens_seen': 185088, 'step': 3, 'step_time_sec': 4.3, 'avg_step_time_sec': 4.61, 'time_to_completion_sec': 78.44, 'estimated_total_time_sec': 92.28, 'step_peak_memory_allocated_MB': 38307.92, 'step_peak_memory_reserved_MB': 55360.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 55360.0, 'step_tokens_per_second': 12131.77, 'avg_tokens_per_second': 11900.82}
+{'loss': 8.3786, 'grad_norm': 47.627689361572266, 'learning_rate': 5.819077862357725e-06, 'epoch': 0.02, 'num_input_tokens_seen': 245760, 'step': 4, 'step_time_sec': 5.63, 'avg_step_time_sec': 4.95, 'time_to_completion_sec': 79.25, 'estimated_total_time_sec': 99.07, 'step_peak_memory_allocated_MB': 38307.87, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 10772.7, 'avg_tokens_per_second': 11473.26}
+{'loss': 8.411, 'grad_norm': 95.4661636352539, 'learning_rate': 5.598076211353317e-06, 'epoch': 0.02, 'num_input_tokens_seen': 301056, 'step': 5, 'step_time_sec': 4.32, 'avg_step_time_sec': 4.79, 'time_to_completion_sec': 71.92, 'estimated_total_time_sec': 95.89, 'step_peak_memory_allocated_MB': 38307.92, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 12808.08, 'avg_tokens_per_second': 11773.76}
+{'loss': 8.7209, 'grad_norm': 73.17122650146484, 'learning_rate': 5.298133329356934e-06, 'epoch': 0.02, 'num_input_tokens_seen': 354816, 'step': 6, 'step_time_sec': 4.11, 'avg_step_time_sec': 4.66, 'time_to_completion_sec': 65.2, 'estimated_total_time_sec': 93.14, 'step_peak_memory_allocated_MB': 38307.85, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 13087.07, 'avg_tokens_per_second': 12005.45}
+{'loss': 8.2012, 'grad_norm': 35.51559066772461, 'learning_rate': 4.928362829059618e-06, 'epoch': 0.03, 'num_input_tokens_seen': 414720, 'step': 7, 'step_time_sec': 4.18, 'avg_step_time_sec': 4.58, 'time_to_completion_sec': 59.5, 'estimated_total_time_sec': 91.54, 'step_peak_memory_allocated_MB': 38307.91, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 14338.07, 'avg_tokens_per_second': 12360.3}
+{'loss': 7.9819, 'grad_norm': 35.38005828857422, 'learning_rate': 4.5e-06, 'epoch': 0.03, 'num_input_tokens_seen': 466944, 'step': 8, 'step_time_sec': 3.58, 'avg_step_time_sec': 4.43, 'time_to_completion_sec': 53.22, 'estimated_total_time_sec': 88.7, 'step_peak_memory_allocated_MB': 38307.84, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 14582.17, 'avg_tokens_per_second': 12616.62}
+{'loss': 7.8575, 'grad_norm': 9.446126937866211, 'learning_rate': 4.0260604299770066e-06, 'epoch': 0.04, 'num_input_tokens_seen': 514560, 'step': 9, 'step_time_sec': 3.47, 'avg_step_time_sec': 4.31, 'time_to_completion_sec': 47.46, 'estimated_total_time_sec': 86.29, 'step_peak_memory_allocated_MB': 38307.84, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 13723.12, 'avg_tokens_per_second': 12727.86}
+{'loss': 7.7469, 'grad_norm': 7.220946788787842, 'learning_rate': 3.5209445330007917e-06, 'epoch': 0.04, 'num_input_tokens_seen': 572928, 'step': 10, 'step_time_sec': 5.99, 'avg_step_time_sec': 4.5, 'time_to_completion_sec': 45.0, 'estimated_total_time_sec': 90.01, 'step_peak_memory_allocated_MB': 38307.75, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 9745.02, 'avg_tokens_per_second': 12286.77}
+{'loss': 7.707, 'grad_norm': 5.809014320373535, 'learning_rate': 3e-06, 'epoch': 0.05, 'num_input_tokens_seen': 625920, 'step': 11, 'step_time_sec': 3.98, 'avg_step_time_sec': 4.45, 'time_to_completion_sec': 40.04, 'estimated_total_time_sec': 88.97, 'step_peak_memory_allocated_MB': 38307.89, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 13304.21, 'avg_tokens_per_second': 12377.87}
+{'loss': 7.5828, 'grad_norm': 5.793330192565918, 'learning_rate': 2.4790554669992093e-06, 'epoch': 0.05, 'num_input_tokens_seen': 676608, 'step': 12, 'step_time_sec': 4.11, 'avg_step_time_sec': 4.42, 'time_to_completion_sec': 35.35, 'estimated_total_time_sec': 88.36, 'step_peak_memory_allocated_MB': 38307.78, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 12323.1, 'avg_tokens_per_second': 12373.23}
+{'loss': 7.5844, 'grad_norm': 6.631588459014893, 'learning_rate': 1.973939570022994e-06, 'epoch': 0.05, 'num_input_tokens_seen': 725760, 'step': 13, 'step_time_sec': 3.92, 'avg_step_time_sec': 4.38, 'time_to_completion_sec': 30.64, 'estimated_total_time_sec': 87.54, 'step_peak_memory_allocated_MB': 38307.82, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 12530.66, 'avg_tokens_per_second': 12384.99}
+{'loss': 7.6481, 'grad_norm': 9.671046257019043, 'learning_rate': 1.5000000000000007e-06, 'epoch': 0.06, 'num_input_tokens_seen': 786432, 'step': 14, 'step_time_sec': 4.48, 'avg_step_time_sec': 4.39, 'time_to_completion_sec': 26.31, 'estimated_total_time_sec': 87.7, 'step_peak_memory_allocated_MB': 38307.91, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 13530.55, 'avg_tokens_per_second': 12475.1}
+{'loss': 7.6636, 'grad_norm': 6.185126781463623, 'learning_rate': 1.0716371709403819e-06, 'epoch': 0.06, 'num_input_tokens_seen': 844800, 'step': 15, 'step_time_sec': 4.48, 'avg_step_time_sec': 4.39, 'time_to_completion_sec': 21.96, 'estimated_total_time_sec': 87.83, 'step_peak_memory_allocated_MB': 38307.91, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 13038.21, 'avg_tokens_per_second': 12516.1}
+{'loss': 7.5504, 'grad_norm': 8.093387603759766, 'learning_rate': 7.018666706430663e-07, 'epoch': 0.07, 'num_input_tokens_seen': 891648, 'step': 16, 'step_time_sec': 4.13, 'avg_step_time_sec': 4.37, 'time_to_completion_sec': 17.5, 'estimated_total_time_sec': 87.49, 'step_peak_memory_allocated_MB': 38307.84, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 11338.4, 'avg_tokens_per_second': 12441.94}
+{'loss': 7.4777, 'grad_norm': 7.326509952545166, 'learning_rate': 4.019237886466839e-07, 'epoch': 0.07, 'num_input_tokens_seen': 956928, 'step': 17, 'step_time_sec': 5.45, 'avg_step_time_sec': 4.44, 'time_to_completion_sec': 13.33, 'estimated_total_time_sec': 88.84, 'step_peak_memory_allocated_MB': 38307.82, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 11971.5, 'avg_tokens_per_second': 12405.84}
+{'loss': 7.6148, 'grad_norm': 6.828405380249023, 'learning_rate': 1.8092213764227505e-07, 'epoch': 0.07, 'num_input_tokens_seen': 1015296, 'step': 18, 'step_time_sec': 4.6, 'avg_step_time_sec': 4.45, 'time_to_completion_sec': 8.9, 'estimated_total_time_sec': 89.02, 'step_peak_memory_allocated_MB': 38307.8, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 12696.34, 'avg_tokens_per_second': 12423.49}
+{'loss': 7.5495, 'grad_norm': 7.313730239868164, 'learning_rate': 4.557674096337594e-08, 'epoch': 0.08, 'num_input_tokens_seen': 1080576, 'step': 19, 'step_time_sec': 4.95, 'avg_step_time_sec': 4.48, 'time_to_completion_sec': 4.48, 'estimated_total_time_sec': 89.58, 'step_peak_memory_allocated_MB': 38307.84, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 13179.17, 'avg_tokens_per_second': 12469.92}
+{'loss': 7.5819, 'grad_norm': 7.678485870361328, 'learning_rate': 0.0, 'epoch': 0.08, 'num_input_tokens_seen': 1138944, 'step': 20, 'step_time_sec': 5.1, 'avg_step_time_sec': 4.51, 'time_to_completion_sec': 0.0, 'estimated_total_time_sec': 90.23, 'step_peak_memory_allocated_MB': 38307.82, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 11440.95, 'avg_tokens_per_second': 12408.68}
+{'train_runtime': 95.584, 'train_samples_per_second': 40.174, 'train_steps_per_second': 0.209, 'train_loss': 8.541682934761047, 'epoch': 0.08, 'num_input_tokens_seen': 1138944, 'step': 20, 'step_time_sec': 5.1, 'avg_step_time_sec': 4.51, 'time_to_completion_sec': 0.0, 'estimated_total_time_sec': 90.23, 'step_peak_memory_allocated_MB': 38307.82, 'step_peak_memory_reserved_MB': 57366.0, 'total_peak_memory_allocated_MB': 38307.92, 'total_peak_memory_reserved_MB': 57366.0, 'step_tokens_per_second': 11440.95, 'avg_tokens_per_second': 12408.68}