{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.006135345726731701, "eval_steps": 20, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012270691453463403, "grad_norm": 0.07455573976039886, "learning_rate": 0.00019981588314717073, "loss": 2.6233, "step": 20 }, { "epoch": 0.0012270691453463403, "eval_loss": 2.2018821239471436, "eval_runtime": 23.583, "eval_samples_per_second": 4.24, "eval_steps_per_second": 0.551, "step": 20 }, { "epoch": 0.0024541382906926807, "grad_norm": 0.0842185765504837, "learning_rate": 0.00019957039401006504, "loss": 2.1109, "step": 40 }, { "epoch": 0.0024541382906926807, "eval_loss": 1.9889661073684692, "eval_runtime": 23.7122, "eval_samples_per_second": 4.217, "eval_steps_per_second": 0.548, "step": 40 }, { "epoch": 0.003681207436039021, "grad_norm": 0.1341264545917511, "learning_rate": 0.00019932490487295938, "loss": 1.9978, "step": 60 }, { "epoch": 0.003681207436039021, "eval_loss": 1.8857674598693848, "eval_runtime": 24.0663, "eval_samples_per_second": 4.155, "eval_steps_per_second": 0.54, "step": 60 }, { "epoch": 0.004908276581385361, "grad_norm": 0.13033552467823029, "learning_rate": 0.00019907941573585368, "loss": 1.8977, "step": 80 }, { "epoch": 0.004908276581385361, "eval_loss": 1.8246879577636719, "eval_runtime": 23.578, "eval_samples_per_second": 4.241, "eval_steps_per_second": 0.551, "step": 80 }, { "epoch": 0.006135345726731701, "grad_norm": 0.11537110805511475, "learning_rate": 0.00019883392659874802, "loss": 1.8383, "step": 100 }, { "epoch": 0.006135345726731701, "eval_loss": 1.7761367559432983, "eval_runtime": 23.7248, "eval_samples_per_second": 4.215, "eval_steps_per_second": 0.548, "step": 100 } ], "logging_steps": 20, "max_steps": 16299, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "total_flos": 6919962687590400.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }