{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6015037593984962, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0030075187969924814, "eval_loss": 10.820727348327637, "eval_runtime": 0.858, "eval_samples_per_second": 163.161, "eval_steps_per_second": 81.58, "step": 1 }, { "epoch": 0.03007518796992481, "grad_norm": 0.24175788462162018, "learning_rate": 0.0002, "loss": 10.8226, "step": 10 }, { "epoch": 0.06015037593984962, "grad_norm": 0.24899822473526, "learning_rate": 0.0002, "loss": 10.81, "step": 20 }, { "epoch": 0.09022556390977443, "grad_norm": 0.3050399124622345, "learning_rate": 0.0002, "loss": 10.7847, "step": 30 }, { "epoch": 0.12030075187969924, "grad_norm": 0.21318531036376953, "learning_rate": 0.0002, "loss": 10.7543, "step": 40 }, { "epoch": 0.15037593984962405, "grad_norm": 0.15988190472126007, "learning_rate": 0.0002, "loss": 10.7285, "step": 50 }, { "epoch": 0.15037593984962405, "eval_loss": 10.709030151367188, "eval_runtime": 0.4624, "eval_samples_per_second": 302.784, "eval_steps_per_second": 151.392, "step": 50 }, { "epoch": 0.18045112781954886, "grad_norm": 0.12351339310407639, "learning_rate": 0.0002, "loss": 10.7157, "step": 60 }, { "epoch": 0.21052631578947367, "grad_norm": 0.10733628273010254, "learning_rate": 0.0002, "loss": 10.7116, "step": 70 }, { "epoch": 0.24060150375939848, "grad_norm": 0.09820961952209473, "learning_rate": 0.0002, "loss": 10.7077, "step": 80 }, { "epoch": 0.2706766917293233, "grad_norm": 0.08355368673801422, "learning_rate": 0.0002, "loss": 10.7058, "step": 90 }, { "epoch": 0.3007518796992481, "grad_norm": 0.08498694747686386, "learning_rate": 0.0002, "loss": 10.7083, "step": 100 }, { "epoch": 0.3007518796992481, "eval_loss": 10.692218780517578, "eval_runtime": 0.4623, "eval_samples_per_second": 302.811, "eval_steps_per_second": 151.406, "step": 100 }, { "epoch": 0.3308270676691729, "grad_norm": 0.10568591207265854, "learning_rate": 0.0002, "loss": 10.703, "step": 110 }, { "epoch": 0.3609022556390977, "grad_norm": 0.10359843820333481, "learning_rate": 0.0002, "loss": 10.6997, "step": 120 }, { "epoch": 0.39097744360902253, "grad_norm": 0.09477504342794418, "learning_rate": 0.0002, "loss": 10.7039, "step": 130 }, { "epoch": 0.42105263157894735, "grad_norm": 0.10229877382516861, "learning_rate": 0.0002, "loss": 10.694, "step": 140 }, { "epoch": 0.45112781954887216, "grad_norm": 0.13511568307876587, "learning_rate": 0.0002, "loss": 10.6899, "step": 150 }, { "epoch": 0.45112781954887216, "eval_loss": 10.677408218383789, "eval_runtime": 0.4665, "eval_samples_per_second": 300.117, "eval_steps_per_second": 150.058, "step": 150 }, { "epoch": 0.48120300751879697, "grad_norm": 0.10884358733892441, "learning_rate": 0.0002, "loss": 10.6897, "step": 160 }, { "epoch": 0.5112781954887218, "grad_norm": 0.08635402470827103, "learning_rate": 0.0002, "loss": 10.6819, "step": 170 }, { "epoch": 0.5413533834586466, "grad_norm": 0.11650286614894867, "learning_rate": 0.0002, "loss": 10.6761, "step": 180 }, { "epoch": 0.5714285714285714, "grad_norm": 0.08204352855682373, "learning_rate": 0.0002, "loss": 10.6727, "step": 190 }, { "epoch": 0.6015037593984962, "grad_norm": 0.10318433493375778, "learning_rate": 0.0002, "loss": 10.6705, "step": 200 }, { "epoch": 0.6015037593984962, "eval_loss": 10.65206527709961, "eval_runtime": 0.4867, "eval_samples_per_second": 287.665, "eval_steps_per_second": 143.833, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 32811830476800.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }