{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 7430, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3364737550471063, "grad_norm": 0.923697829246521, "learning_rate": 0.00046635262449528937, "loss": 1.9724, "step": 500 }, { "epoch": 0.6729475100942126, "grad_norm": 0.9243040084838867, "learning_rate": 0.0004327052489905787, "loss": 1.4423, "step": 1000 }, { "epoch": 1.0, "eval_loss": 1.0461400747299194, "eval_runtime": 36.2574, "eval_samples_per_second": 81.942, "eval_steps_per_second": 10.26, "step": 1486 }, { "epoch": 1.009421265141319, "grad_norm": 0.9555139541625977, "learning_rate": 0.0003990578734858681, "loss": 1.2854, "step": 1500 }, { "epoch": 1.3458950201884252, "grad_norm": 0.8507774472236633, "learning_rate": 0.0003654104979811575, "loss": 0.9929, "step": 2000 }, { "epoch": 1.6823687752355316, "grad_norm": 1.1206731796264648, "learning_rate": 0.00033176312247644685, "loss": 0.9408, "step": 2500 }, { "epoch": 2.0, "eval_loss": 0.9026183485984802, "eval_runtime": 36.3158, "eval_samples_per_second": 81.81, "eval_steps_per_second": 10.243, "step": 2972 }, { "epoch": 2.018842530282638, "grad_norm": 0.7318525910377502, "learning_rate": 0.0002981157469717362, "loss": 0.8886, "step": 3000 }, { "epoch": 2.3553162853297445, "grad_norm": 1.1639642715454102, "learning_rate": 0.00026446837146702556, "loss": 0.6969, "step": 3500 }, { "epoch": 2.6917900403768504, "grad_norm": 0.7347049117088318, "learning_rate": 0.00023082099596231497, "loss": 0.692, "step": 4000 }, { "epoch": 3.0, "eval_loss": 0.8661695122718811, "eval_runtime": 36.5469, "eval_samples_per_second": 81.293, "eval_steps_per_second": 10.179, "step": 4458 }, { "epoch": 3.028263795423957, "grad_norm": 0.7746924757957458, "learning_rate": 0.00019717362045760433, "loss": 0.6564, "step": 4500 }, { "epoch": 3.3647375504710633, "grad_norm": 0.7316901087760925, "learning_rate": 0.00016352624495289368, "loss": 0.4934, "step": 5000 }, { "epoch": 3.7012113055181697, "grad_norm": 0.9040531516075134, "learning_rate": 0.00012987886944818307, "loss": 0.5261, "step": 5500 }, { "epoch": 4.0, "eval_loss": 0.8571327924728394, "eval_runtime": 36.3399, "eval_samples_per_second": 81.756, "eval_steps_per_second": 10.237, "step": 5944 }, { "epoch": 4.037685060565276, "grad_norm": 0.9058707356452942, "learning_rate": 9.623149394347241e-05, "loss": 0.4785, "step": 6000 }, { "epoch": 4.3741588156123825, "grad_norm": 0.7362410426139832, "learning_rate": 6.258411843876178e-05, "loss": 0.3714, "step": 6500 }, { "epoch": 4.710632570659489, "grad_norm": 0.6890231370925903, "learning_rate": 2.8936742934051144e-05, "loss": 0.3846, "step": 7000 } ], "logging_steps": 500, "max_steps": 7430, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7016439606285312.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }