{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.8198801550934087, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 0.0002, "loss": 1.0908, "step": 20 }, { "epoch": 0.11, "eval_loss": 0.8531244397163391, "eval_runtime": 113.8743, "eval_samples_per_second": 17.563, "eval_steps_per_second": 0.281, "step": 20 }, { "epoch": 0.23, "learning_rate": 0.0002, "loss": 0.9266, "step": 40 }, { "epoch": 0.23, "eval_loss": 0.8096991777420044, "eval_runtime": 113.8834, "eval_samples_per_second": 17.562, "eval_steps_per_second": 0.281, "step": 40 }, { "epoch": 0.34, "learning_rate": 0.0002, "loss": 0.8862, "step": 60 }, { "epoch": 0.34, "eval_loss": 0.7927151322364807, "eval_runtime": 113.7971, "eval_samples_per_second": 17.575, "eval_steps_per_second": 0.281, "step": 60 }, { "epoch": 0.45, "learning_rate": 0.0002, "loss": 0.8688, "step": 80 }, { "epoch": 0.45, "eval_loss": 0.7790905237197876, "eval_runtime": 113.7549, "eval_samples_per_second": 17.582, "eval_steps_per_second": 0.281, "step": 80 }, { "epoch": 0.56, "learning_rate": 0.0002, "loss": 0.861, "step": 100 }, { "epoch": 0.56, "eval_loss": 0.776690661907196, "eval_runtime": 113.8772, "eval_samples_per_second": 17.563, "eval_steps_per_second": 0.281, "step": 100 }, { "epoch": 0.68, "learning_rate": 0.0002, "loss": 0.8509, "step": 120 }, { "epoch": 0.68, "eval_loss": 0.7719365358352661, "eval_runtime": 113.7172, "eval_samples_per_second": 17.587, "eval_steps_per_second": 0.281, "step": 120 }, { "epoch": 0.79, "learning_rate": 0.0002, "loss": 0.8301, "step": 140 }, { "epoch": 0.79, "eval_loss": 0.760235607624054, "eval_runtime": 113.7009, "eval_samples_per_second": 17.59, "eval_steps_per_second": 0.281, "step": 140 }, { "epoch": 0.9, "learning_rate": 0.0002, "loss": 0.8126, "step": 160 }, { "epoch": 0.9, "eval_loss": 0.7536348104476929, "eval_runtime": 113.7032, "eval_samples_per_second": 17.59, "eval_steps_per_second": 0.281, "step": 160 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 0.7979, "step": 180 }, { "epoch": 1.02, "eval_loss": 0.7558956742286682, "eval_runtime": 113.7654, "eval_samples_per_second": 17.58, "eval_steps_per_second": 0.281, "step": 180 }, { "epoch": 1.13, "learning_rate": 0.0002, "loss": 0.4881, "step": 200 }, { "epoch": 1.13, "eval_loss": 0.7853934168815613, "eval_runtime": 113.7426, "eval_samples_per_second": 17.584, "eval_steps_per_second": 0.281, "step": 200 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 0.491, "step": 220 }, { "epoch": 1.24, "eval_loss": 0.7784026861190796, "eval_runtime": 113.7986, "eval_samples_per_second": 17.575, "eval_steps_per_second": 0.281, "step": 220 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 0.489, "step": 240 }, { "epoch": 1.35, "eval_loss": 0.7843385338783264, "eval_runtime": 113.8087, "eval_samples_per_second": 17.573, "eval_steps_per_second": 0.281, "step": 240 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.4947, "step": 260 }, { "epoch": 1.47, "eval_loss": 0.790459156036377, "eval_runtime": 113.8164, "eval_samples_per_second": 17.572, "eval_steps_per_second": 0.281, "step": 260 }, { "epoch": 1.58, "learning_rate": 0.0002, "loss": 0.4937, "step": 280 }, { "epoch": 1.58, "eval_loss": 0.7871547937393188, "eval_runtime": 113.8459, "eval_samples_per_second": 17.568, "eval_steps_per_second": 0.281, "step": 280 }, { "epoch": 1.69, "learning_rate": 0.0002, "loss": 0.5175, "step": 300 }, { "epoch": 1.69, "eval_loss": 0.7906413674354553, "eval_runtime": 113.7575, "eval_samples_per_second": 17.581, "eval_steps_per_second": 0.281, "step": 300 }, { "epoch": 1.8, "learning_rate": 0.0002, "loss": 0.5008, "step": 320 }, { "epoch": 1.8, "eval_loss": 0.7836869955062866, "eval_runtime": 113.7644, "eval_samples_per_second": 17.58, "eval_steps_per_second": 0.281, "step": 320 }, { "epoch": 1.92, "learning_rate": 0.0002, "loss": 0.4998, "step": 340 }, { "epoch": 1.92, "eval_loss": 0.7940365672111511, "eval_runtime": 113.7944, "eval_samples_per_second": 17.576, "eval_steps_per_second": 0.281, "step": 340 }, { "epoch": 2.03, "learning_rate": 0.0002, "loss": 0.4598, "step": 360 }, { "epoch": 2.03, "eval_loss": 0.8568623065948486, "eval_runtime": 113.8293, "eval_samples_per_second": 17.57, "eval_steps_per_second": 0.281, "step": 360 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 0.2924, "step": 380 }, { "epoch": 2.14, "eval_loss": 0.8405641913414001, "eval_runtime": 113.7058, "eval_samples_per_second": 17.589, "eval_steps_per_second": 0.281, "step": 380 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 0.3068, "step": 400 }, { "epoch": 2.26, "eval_loss": 0.8450121879577637, "eval_runtime": 113.8088, "eval_samples_per_second": 17.573, "eval_steps_per_second": 0.281, "step": 400 }, { "epoch": 2.37, "learning_rate": 0.0002, "loss": 0.2982, "step": 420 }, { "epoch": 2.37, "eval_loss": 0.8488427400588989, "eval_runtime": 113.7591, "eval_samples_per_second": 17.581, "eval_steps_per_second": 0.281, "step": 420 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 0.3092, "step": 440 }, { "epoch": 2.48, "eval_loss": 0.8430971503257751, "eval_runtime": 113.7315, "eval_samples_per_second": 17.585, "eval_steps_per_second": 0.281, "step": 440 }, { "epoch": 2.59, "learning_rate": 0.0002, "loss": 0.3053, "step": 460 }, { "epoch": 2.59, "eval_loss": 0.8562591075897217, "eval_runtime": 113.7597, "eval_samples_per_second": 17.581, "eval_steps_per_second": 0.281, "step": 460 }, { "epoch": 2.71, "learning_rate": 0.0002, "loss": 0.3029, "step": 480 }, { "epoch": 2.71, "eval_loss": 0.8492431044578552, "eval_runtime": 113.744, "eval_samples_per_second": 17.583, "eval_steps_per_second": 0.281, "step": 480 }, { "epoch": 2.82, "learning_rate": 0.0002, "loss": 0.3233, "step": 500 }, { "epoch": 2.82, "eval_loss": 0.8486313223838806, "eval_runtime": 113.729, "eval_samples_per_second": 17.586, "eval_steps_per_second": 0.281, "step": 500 } ], "max_steps": 750, "num_train_epochs": 5, "total_flos": 4.995957240616714e+17, "trial_name": null, "trial_params": null }