{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 123, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008130081300813009, "grad_norm": 5.968609245665954, "learning_rate": 3.846153846153846e-08, "loss": 1.3047, "step": 1 }, { "epoch": 0.04065040650406504, "grad_norm": 6.622098028585483, "learning_rate": 1.9230769230769231e-07, "loss": 1.3293, "step": 5 }, { "epoch": 0.08130081300813008, "grad_norm": 5.601752396412818, "learning_rate": 3.8461538461538463e-07, "loss": 1.3265, "step": 10 }, { "epoch": 0.12195121951219512, "grad_norm": 3.3465461495166955, "learning_rate": 4.995922759815338e-07, "loss": 1.3014, "step": 15 }, { "epoch": 0.16260162601626016, "grad_norm": 2.285061060375805, "learning_rate": 4.950206402730983e-07, "loss": 1.267, "step": 20 }, { "epoch": 0.2032520325203252, "grad_norm": 2.906521061514324, "learning_rate": 4.854610909098811e-07, "loss": 1.2635, "step": 25 }, { "epoch": 0.24390243902439024, "grad_norm": 2.2161224244788196, "learning_rate": 4.7110823274945357e-07, "loss": 1.2199, "step": 30 }, { "epoch": 0.2845528455284553, "grad_norm": 1.9040029033032413, "learning_rate": 4.5225424859373684e-07, "loss": 1.2142, "step": 35 }, { "epoch": 0.3252032520325203, "grad_norm": 1.8144925748816008, "learning_rate": 4.292829511897409e-07, "loss": 1.2051, "step": 40 }, { "epoch": 0.36585365853658536, "grad_norm": 1.6349825973341405, "learning_rate": 4.0266196990885955e-07, "loss": 1.1723, "step": 45 }, { "epoch": 0.4065040650406504, "grad_norm": 1.664912835299134, "learning_rate": 3.72933231161401e-07, "loss": 1.177, "step": 50 }, { "epoch": 0.44715447154471544, "grad_norm": 1.6158486513675248, "learning_rate": 3.407019263376602e-07, "loss": 1.1748, "step": 55 }, { "epoch": 0.4878048780487805, "grad_norm": 1.661677150645168, "learning_rate": 3.0662419185644114e-07, "loss": 1.1791, "step": 60 }, { "epoch": 0.5284552845528455, "grad_norm": 1.5725357820334132, "learning_rate": 2.7139375211970995e-07, "loss": 1.1598, "step": 65 }, { "epoch": 0.5691056910569106, "grad_norm": 1.621045635617722, "learning_rate": 2.3572779728430797e-07, "loss": 1.166, "step": 70 }, { "epoch": 0.6097560975609756, "grad_norm": 1.6041377989080807, "learning_rate": 2.0035238333856368e-07, "loss": 1.1698, "step": 75 }, { "epoch": 0.6504065040650406, "grad_norm": 1.6387599262025716, "learning_rate": 1.6598765169614244e-07, "loss": 1.1467, "step": 80 }, { "epoch": 0.6910569105691057, "grad_norm": 1.5728900163922992, "learning_rate": 1.3333316919358157e-07, "loss": 1.1588, "step": 85 }, { "epoch": 0.7317073170731707, "grad_norm": 1.630523905646432, "learning_rate": 1.0305368692688174e-07, "loss": 1.1511, "step": 90 }, { "epoch": 0.7723577235772358, "grad_norm": 1.6142247634044153, "learning_rate": 7.576560783617667e-08, "loss": 1.1514, "step": 95 }, { "epoch": 0.8130081300813008, "grad_norm": 1.6635344099346825, "learning_rate": 5.202443851943125e-08, "loss": 1.1339, "step": 100 }, { "epoch": 0.8536585365853658, "grad_norm": 1.6142940019540397, "learning_rate": 3.231348072005574e-08, "loss": 1.1436, "step": 105 }, { "epoch": 0.8943089430894309, "grad_norm": 1.7064997556493062, "learning_rate": 1.7033992697136928e-08, "loss": 1.1522, "step": 110 }, { "epoch": 0.9349593495934959, "grad_norm": 1.5806639080165765, "learning_rate": 6.497020764416633e-09, "loss": 1.1297, "step": 115 }, { "epoch": 0.975609756097561, "grad_norm": 1.649978945004722, "learning_rate": 9.170672843271666e-10, "loss": 1.1588, "step": 120 }, { "epoch": 1.0, "eval_loss": 1.137434482574463, "eval_runtime": 4.7008, "eval_samples_per_second": 108.067, "eval_steps_per_second": 1.702, "step": 123 }, { "epoch": 1.0, "step": 123, "total_flos": 51507395297280.0, "train_loss": 1.1927482141711847, "train_runtime": 664.3286, "train_samples_per_second": 23.68, "train_steps_per_second": 0.185 } ], "logging_steps": 5, "max_steps": 123, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 51507395297280.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }