{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1933, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05173305742369374, "grad_norm": 0.20534886419773102, "learning_rate": 1.0362694300518135e-05, "loss": 2.372, "step": 100 }, { "epoch": 0.10346611484738748, "grad_norm": 0.3482538163661957, "learning_rate": 1.9999201340701767e-05, "loss": 2.2663, "step": 200 }, { "epoch": 0.1551991722710812, "grad_norm": 0.5800974369049072, "learning_rate": 1.981396800312055e-05, "loss": 2.1092, "step": 300 }, { "epoch": 0.20693222969477496, "grad_norm": 0.8034837245941162, "learning_rate": 1.9309679502465225e-05, "loss": 1.9755, "step": 400 }, { "epoch": 0.2586652871184687, "grad_norm": 0.8570711016654968, "learning_rate": 1.8502730415298682e-05, "loss": 1.887, "step": 500 }, { "epoch": 0.3103983445421624, "grad_norm": 0.9559773206710815, "learning_rate": 1.741935490832257e-05, "loss": 1.8283, "step": 600 }, { "epoch": 0.3621314019658562, "grad_norm": 0.9662678837776184, "learning_rate": 1.6094773857429165e-05, "loss": 1.8393, "step": 700 }, { "epoch": 0.4138644593895499, "grad_norm": 1.23440682888031, "learning_rate": 1.4572049806032437e-05, "loss": 1.7, "step": 800 }, { "epoch": 0.4655975168132437, "grad_norm": 1.1646474599838257, "learning_rate": 1.2900686988340288e-05, "loss": 1.703, "step": 900 }, { "epoch": 0.5173305742369374, "grad_norm": 1.043607473373413, "learning_rate": 1.1135021931248375e-05, "loss": 1.6366, "step": 1000 }, { "epoch": 0.5690636316606311, "grad_norm": 1.163309931755066, "learning_rate": 9.332456956890377e-06, "loss": 1.6961, "step": 1100 }, { "epoch": 0.6207966890843248, "grad_norm": 1.1546854972839355, "learning_rate": 7.551594015228087e-06, "loss": 1.6418, "step": 1200 }, { "epoch": 0.6725297465080187, "grad_norm": 1.1681387424468994, "learning_rate": 5.850329516366368e-06, "loss": 1.6815, "step": 1300 }, { "epoch": 0.7242628039317124, "grad_norm": 1.1528677940368652, "learning_rate": 4.283972100189098e-06, "loss": 1.6221, "step": 1400 }, { "epoch": 0.7759958613554061, "grad_norm": 1.3576257228851318, "learning_rate": 2.903444535212738e-06, "loss": 1.6389, "step": 1500 }, { "epoch": 0.8277289187790998, "grad_norm": 1.1139311790466309, "learning_rate": 1.7536282034870066e-06, "loss": 1.6398, "step": 1600 }, { "epoch": 0.8794619762027935, "grad_norm": 1.1810859441757202, "learning_rate": 8.71903992855374e-07, "loss": 1.678, "step": 1700 }, { "epoch": 0.9311950336264874, "grad_norm": 1.1703745126724243, "learning_rate": 2.8693703262333894e-07, "loss": 1.6531, "step": 1800 }, { "epoch": 0.9829280910501811, "grad_norm": 1.3099007606506348, "learning_rate": 1.774478125728729e-08, "loss": 1.653, "step": 1900 }, { "epoch": 1.0, "eval_loss": 1.5814313888549805, "eval_runtime": 31.0467, "eval_samples_per_second": 13.464, "eval_steps_per_second": 1.707, "step": 1933 }, { "epoch": 1.0, "step": 1933, "total_flos": 3.52000008192e+16, "train_loss": 1.7970823928351485, "train_runtime": 655.195, "train_samples_per_second": 5.899, "train_steps_per_second": 2.95 } ], "logging_steps": 100, "max_steps": 1933, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.52000008192e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }