| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.6339129420002334, | |
| "eval_steps": 250, | |
| "global_step": 1750, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2333994631812347, | |
| "grad_norm": 3.2987656593322754, | |
| "learning_rate": 8.978253703087328e-05, | |
| "loss": 0.2749, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2333994631812347, | |
| "eval_loss": 0.15301218628883362, | |
| "eval_runtime": 38.0307, | |
| "eval_samples_per_second": 2.288, | |
| "eval_steps_per_second": 2.288, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4667989263624694, | |
| "grad_norm": 3.038522958755493, | |
| "learning_rate": 5.813544178200335e-05, | |
| "loss": 0.115, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4667989263624694, | |
| "eval_loss": 0.12411840260028839, | |
| "eval_runtime": 37.9989, | |
| "eval_samples_per_second": 2.29, | |
| "eval_steps_per_second": 2.29, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7001983895437041, | |
| "grad_norm": 2.7000675201416016, | |
| "learning_rate": 2.2048719230965166e-05, | |
| "loss": 0.1008, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7001983895437041, | |
| "eval_loss": 0.11009757965803146, | |
| "eval_runtime": 38.0347, | |
| "eval_samples_per_second": 2.287, | |
| "eval_steps_per_second": 2.287, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9335978527249388, | |
| "grad_norm": 2.124303102493286, | |
| "learning_rate": 1.2154116213611399e-06, | |
| "loss": 0.0911, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9335978527249388, | |
| "eval_loss": 0.10688204318284988, | |
| "eval_runtime": 37.9463, | |
| "eval_samples_per_second": 2.293, | |
| "eval_steps_per_second": 2.293, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.167114015637764, | |
| "grad_norm": 2.2942659854888916, | |
| "learning_rate": 3.915853581228413e-05, | |
| "loss": 0.7993, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.167114015637764, | |
| "eval_loss": 0.1208883598446846, | |
| "eval_runtime": 21.9734, | |
| "eval_samples_per_second": 3.959, | |
| "eval_steps_per_second": 3.959, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.4005134788189988, | |
| "grad_norm": 1.147830843925476, | |
| "learning_rate": 2.1903963223439395e-05, | |
| "loss": 0.7032, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4005134788189988, | |
| "eval_loss": 0.10965924710035324, | |
| "eval_runtime": 22.0058, | |
| "eval_samples_per_second": 3.954, | |
| "eval_steps_per_second": 3.954, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.6339129420002334, | |
| "grad_norm": 2.2439823150634766, | |
| "learning_rate": 8.619209196560924e-06, | |
| "loss": 0.6596, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.6339129420002334, | |
| "eval_loss": 0.10602504760026932, | |
| "eval_runtime": 22.0084, | |
| "eval_samples_per_second": 3.953, | |
| "eval_steps_per_second": 3.953, | |
| "step": 1750 | |
| } | |
| ], | |
| "logging_steps": 250, | |
| "max_steps": 2142, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 250, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.415876128180654e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |